1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #if defined(PETSC_HAVE_PLAPACK) 9 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 10 static MPI_Comm Plapack_comm_2d; 11 EXTERN_C_BEGIN 12 #include <PLA.h> 13 EXTERN_C_END 14 15 typedef struct { 16 PLA_Obj A,pivots; 17 PLA_Template templ; 18 MPI_Datatype datatype; 19 PetscInt nb,rstart; 20 VecScatter ctx; 21 IS is_pla,is_petsc; 22 PetscBool pla_solved; 23 MatStructure mstruct; 24 } Mat_Plapack; 25 #endif 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "MatDenseGetLocalMatrix" 29 /*@ 30 31 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 32 matrix that represents the operator. For sequential matrices it returns itself. 33 34 Input Parameter: 35 . A - the Seq or MPI dense matrix 36 37 Output Parameter: 38 . B - the inner matrix 39 40 Level: intermediate 41 42 @*/ 43 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscBool flg; 48 49 PetscFunctionBegin; 50 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 51 if (flg) { 52 *B = mat->A; 53 } else { 54 *B = A; 55 } 56 PetscFunctionReturn(0); 57 } 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatGetRow_MPIDense" 61 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 62 { 63 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 66 67 PetscFunctionBegin; 68 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 69 lrow = row - rstart; 70 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatRestoreRow_MPIDense" 76 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 77 { 78 PetscErrorCode ierr; 79 80 PetscFunctionBegin; 81 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 82 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 83 PetscFunctionReturn(0); 84 } 85 86 EXTERN_C_BEGIN 87 #undef __FUNCT__ 88 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 89 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *B) 90 { 91 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 92 PetscErrorCode ierr; 93 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 94 PetscScalar *array; 95 MPI_Comm comm; 96 97 PetscFunctionBegin; 98 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 99 100 /* The reuse aspect is not implemented efficiently */ 101 if (reuse) { ierr = MatDestroy(B);CHKERRQ(ierr);} 102 103 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 104 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 105 ierr = MatCreate(comm,B);CHKERRQ(ierr); 106 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 107 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 108 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 109 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 110 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 111 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 113 *iscopy = PETSC_TRUE; 114 PetscFunctionReturn(0); 115 } 116 EXTERN_C_END 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPIDense" 120 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 123 PetscErrorCode ierr; 124 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 125 PetscBool roworiented = A->roworiented; 126 127 PetscFunctionBegin; 128 if (v) PetscValidScalarPointer(v,6); 129 for (i=0; i<m; i++) { 130 if (idxm[i] < 0) continue; 131 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 132 if (idxm[i] >= rstart && idxm[i] < rend) { 133 row = idxm[i] - rstart; 134 if (roworiented) { 135 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 136 } else { 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 141 } 142 } 143 } else { 144 if (!A->donotstash) { 145 if (roworiented) { 146 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 147 } else { 148 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 149 } 150 } 151 } 152 } 153 PetscFunctionReturn(0); 154 } 155 156 #undef __FUNCT__ 157 #define __FUNCT__ "MatGetValues_MPIDense" 158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 159 { 160 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 161 PetscErrorCode ierr; 162 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 163 164 PetscFunctionBegin; 165 for (i=0; i<m; i++) { 166 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 167 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 168 if (idxm[i] >= rstart && idxm[i] < rend) { 169 row = idxm[i] - rstart; 170 for (j=0; j<n; j++) { 171 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 172 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 173 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 174 } 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 176 } 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatGetArray_MPIDense" 182 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 183 { 184 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 185 PetscErrorCode ierr; 186 187 PetscFunctionBegin; 188 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 194 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 195 { 196 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 197 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 198 PetscErrorCode ierr; 199 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 200 const PetscInt *irow,*icol; 201 PetscScalar *av,*bv,*v = lmat->v; 202 Mat newmat; 203 IS iscol_local; 204 205 PetscFunctionBegin; 206 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 207 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 208 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 209 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 210 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 211 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 212 213 /* No parallel redistribution currently supported! Should really check each index set 214 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 215 original matrix! */ 216 217 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 218 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 219 220 /* Check submatrix call */ 221 if (scall == MAT_REUSE_MATRIX) { 222 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 223 /* Really need to test rows and column sizes! */ 224 newmat = *B; 225 } else { 226 /* Create and fill new matrix */ 227 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 228 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 229 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 230 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 231 } 232 233 /* Now extract the data pointers and do the copy, column at a time */ 234 newmatd = (Mat_MPIDense*)newmat->data; 235 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 236 237 for (i=0; i<Ncols; i++) { 238 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 239 for (j=0; j<nrows; j++) { 240 *bv++ = av[irow[j] - rstart]; 241 } 242 } 243 244 /* Assemble the matrices so that the correct flags are set */ 245 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 246 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 248 /* Free work space */ 249 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 250 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 251 *B = newmat; 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatRestoreArray_MPIDense" 257 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 258 { 259 PetscFunctionBegin; 260 PetscFunctionReturn(0); 261 } 262 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 265 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 266 { 267 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 268 MPI_Comm comm = ((PetscObject)mat)->comm; 269 PetscErrorCode ierr; 270 PetscInt nstash,reallocs; 271 InsertMode addv; 272 273 PetscFunctionBegin; 274 /* make sure all processors are either in INSERTMODE or ADDMODE */ 275 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 276 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 277 mat->insertmode = addv; /* in case this processor had no cache */ 278 279 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 280 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 281 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 282 PetscFunctionReturn(0); 283 } 284 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 287 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 288 { 289 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 290 PetscErrorCode ierr; 291 PetscInt i,*row,*col,flg,j,rstart,ncols; 292 PetscMPIInt n; 293 PetscScalar *val; 294 InsertMode addv=mat->insertmode; 295 296 PetscFunctionBegin; 297 /* wait on receives */ 298 while (1) { 299 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 300 if (!flg) break; 301 302 for (i=0; i<n;) { 303 /* Now identify the consecutive vals belonging to the same row */ 304 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 305 if (j < n) ncols = j-i; 306 else ncols = n-i; 307 /* Now assemble all these values with a single function call */ 308 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 309 i = j; 310 } 311 } 312 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 313 314 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 315 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 316 317 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 318 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 319 } 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNCT__ 324 #define __FUNCT__ "MatZeroEntries_MPIDense" 325 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 326 { 327 PetscErrorCode ierr; 328 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 329 330 PetscFunctionBegin; 331 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 /* the code does not do the diagonal entries correctly unless the 336 matrix is square and the column and row owerships are identical. 337 This is a BUG. The only way to fix it seems to be to access 338 mdn->A and mdn->B directly and not through the MatZeroRows() 339 routine. 340 */ 341 #undef __FUNCT__ 342 #define __FUNCT__ "MatZeroRows_MPIDense" 343 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 344 { 345 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 346 PetscErrorCode ierr; 347 PetscInt i,*owners = A->rmap->range; 348 PetscInt *nprocs,j,idx,nsends; 349 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 350 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 351 PetscInt *lens,*lrows,*values; 352 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 353 MPI_Comm comm = ((PetscObject)A)->comm; 354 MPI_Request *send_waits,*recv_waits; 355 MPI_Status recv_status,*send_status; 356 PetscBool found; 357 const PetscScalar *xx; 358 PetscScalar *bb; 359 360 PetscFunctionBegin; 361 /* first count number of contributors to each processor */ 362 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 363 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 364 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 365 for (i=0; i<N; i++) { 366 idx = rows[i]; 367 found = PETSC_FALSE; 368 for (j=0; j<size; j++) { 369 if (idx >= owners[j] && idx < owners[j+1]) { 370 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 371 } 372 } 373 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 374 } 375 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 376 377 /* inform other processors of number of messages and max length*/ 378 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 379 380 /* post receives: */ 381 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 382 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 383 for (i=0; i<nrecvs; i++) { 384 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 385 } 386 387 /* do sends: 388 1) starts[i] gives the starting index in svalues for stuff going to 389 the ith processor 390 */ 391 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 392 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 393 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 394 starts[0] = 0; 395 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 396 for (i=0; i<N; i++) { 397 svalues[starts[owner[i]]++] = rows[i]; 398 } 399 400 starts[0] = 0; 401 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 402 count = 0; 403 for (i=0; i<size; i++) { 404 if (nprocs[2*i+1]) { 405 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 406 } 407 } 408 ierr = PetscFree(starts);CHKERRQ(ierr); 409 410 base = owners[rank]; 411 412 /* wait on receives */ 413 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 414 count = nrecvs; 415 slen = 0; 416 while (count) { 417 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 418 /* unpack receives into our local space */ 419 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 420 source[imdex] = recv_status.MPI_SOURCE; 421 lens[imdex] = n; 422 slen += n; 423 count--; 424 } 425 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 426 427 /* move the data into the send scatter */ 428 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 429 count = 0; 430 for (i=0; i<nrecvs; i++) { 431 values = rvalues + i*nmax; 432 for (j=0; j<lens[i]; j++) { 433 lrows[count++] = values[j] - base; 434 } 435 } 436 ierr = PetscFree(rvalues);CHKERRQ(ierr); 437 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 438 ierr = PetscFree(owner);CHKERRQ(ierr); 439 ierr = PetscFree(nprocs);CHKERRQ(ierr); 440 441 /* fix right hand side if needed */ 442 if (x && b) { 443 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 444 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 445 for (i=0; i<slen; i++) { 446 bb[lrows[i]] = diag*xx[lrows[i]]; 447 } 448 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 449 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 450 } 451 452 /* actually zap the local rows */ 453 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 454 ierr = PetscFree(lrows);CHKERRQ(ierr); 455 456 /* wait on sends */ 457 if (nsends) { 458 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 459 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 460 ierr = PetscFree(send_status);CHKERRQ(ierr); 461 } 462 ierr = PetscFree(send_waits);CHKERRQ(ierr); 463 ierr = PetscFree(svalues);CHKERRQ(ierr); 464 465 PetscFunctionReturn(0); 466 } 467 468 #undef __FUNCT__ 469 #define __FUNCT__ "MatMult_MPIDense" 470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 471 { 472 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 473 PetscErrorCode ierr; 474 475 PetscFunctionBegin; 476 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatMultAdd_MPIDense" 484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 491 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 492 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatMultTranspose_MPIDense" 498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 499 { 500 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 501 PetscErrorCode ierr; 502 PetscScalar zero = 0.0; 503 504 PetscFunctionBegin; 505 ierr = VecSet(yy,zero);CHKERRQ(ierr); 506 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 507 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 508 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 515 { 516 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 517 PetscErrorCode ierr; 518 519 PetscFunctionBegin; 520 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 521 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 522 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 523 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 524 PetscFunctionReturn(0); 525 } 526 527 #undef __FUNCT__ 528 #define __FUNCT__ "MatGetDiagonal_MPIDense" 529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 530 { 531 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 532 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 533 PetscErrorCode ierr; 534 PetscInt len,i,n,m = A->rmap->n,radd; 535 PetscScalar *x,zero = 0.0; 536 537 PetscFunctionBegin; 538 ierr = VecSet(v,zero);CHKERRQ(ierr); 539 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 540 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 541 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 542 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 543 radd = A->rmap->rstart*m; 544 for (i=0; i<len; i++) { 545 x[i] = aloc->v[radd + i*m + i]; 546 } 547 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatDestroy_MPIDense" 553 PetscErrorCode MatDestroy_MPIDense(Mat mat) 554 { 555 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 556 PetscErrorCode ierr; 557 #if defined(PETSC_HAVE_PLAPACK) 558 Mat_Plapack *lu=(Mat_Plapack*)mat->spptr; 559 #endif 560 561 PetscFunctionBegin; 562 563 #if defined(PETSC_USE_LOG) 564 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 565 #endif 566 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 567 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 568 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 569 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 570 #if defined(PETSC_HAVE_PLAPACK) 571 if (lu) { 572 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 573 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 574 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 575 ierr = ISDestroy(&lu->is_pla);CHKERRQ(ierr); 576 ierr = ISDestroy(&lu->is_petsc);CHKERRQ(ierr); 577 ierr = VecScatterDestroy(&lu->ctx);CHKERRQ(ierr); 578 } 579 ierr = PetscFree(mat->spptr);CHKERRQ(ierr); 580 #endif 581 582 ierr = PetscFree(mat->data);CHKERRQ(ierr); 583 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 589 PetscFunctionReturn(0); 590 } 591 592 #undef __FUNCT__ 593 #define __FUNCT__ "MatView_MPIDense_Binary" 594 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 595 { 596 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 597 PetscErrorCode ierr; 598 PetscViewerFormat format; 599 int fd; 600 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 601 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 602 PetscScalar *work,*v,*vv; 603 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 604 MPI_Status status; 605 606 PetscFunctionBegin; 607 if (mdn->size == 1) { 608 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 609 } else { 610 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 611 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 612 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 613 614 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 615 if (format == PETSC_VIEWER_NATIVE) { 616 617 if (!rank) { 618 /* store the matrix as a dense matrix */ 619 header[0] = MAT_FILE_CLASSID; 620 header[1] = mat->rmap->N; 621 header[2] = N; 622 header[3] = MATRIX_BINARY_FORMAT_DENSE; 623 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 624 625 /* get largest work array needed for transposing array */ 626 mmax = mat->rmap->n; 627 for (i=1; i<size; i++) { 628 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 629 } 630 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 631 632 /* write out local array, by rows */ 633 m = mat->rmap->n; 634 v = a->v; 635 for (j=0; j<N; j++) { 636 for (i=0; i<m; i++) { 637 work[j + i*N] = *v++; 638 } 639 } 640 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 641 /* get largest work array to receive messages from other processes, excludes process zero */ 642 mmax = 0; 643 for (i=1; i<size; i++) { 644 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 645 } 646 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 647 for(k = 1; k < size; k++) { 648 v = vv; 649 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 650 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 651 652 for(j = 0; j < N; j++) { 653 for(i = 0; i < m; i++) { 654 work[j + i*N] = *v++; 655 } 656 } 657 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 658 } 659 ierr = PetscFree(work);CHKERRQ(ierr); 660 ierr = PetscFree(vv);CHKERRQ(ierr); 661 } else { 662 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 663 } 664 } else { 665 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 666 } 667 } 668 PetscFunctionReturn(0); 669 } 670 671 #undef __FUNCT__ 672 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 673 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 674 { 675 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 676 PetscErrorCode ierr; 677 PetscMPIInt size = mdn->size,rank = mdn->rank; 678 const PetscViewerType vtype; 679 PetscBool iascii,isdraw; 680 PetscViewer sviewer; 681 PetscViewerFormat format; 682 #if defined(PETSC_HAVE_PLAPACK) 683 Mat_Plapack *lu; 684 #endif 685 686 PetscFunctionBegin; 687 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 688 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 689 if (iascii) { 690 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 691 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 692 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 693 MatInfo info; 694 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 695 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 696 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 697 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 698 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 699 #if defined(PETSC_HAVE_PLAPACK) 700 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 701 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 702 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 703 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 704 if (mat->factortype){ 705 lu=(Mat_Plapack*)(mat->spptr); 706 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 707 } 708 #else 709 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 710 #endif 711 PetscFunctionReturn(0); 712 } else if (format == PETSC_VIEWER_ASCII_INFO) { 713 PetscFunctionReturn(0); 714 } 715 } else if (isdraw) { 716 PetscDraw draw; 717 PetscBool isnull; 718 719 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 720 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 721 if (isnull) PetscFunctionReturn(0); 722 } 723 724 if (size == 1) { 725 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 726 } else { 727 /* assemble the entire matrix onto first processor. */ 728 Mat A; 729 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 730 PetscInt *cols; 731 PetscScalar *vals; 732 733 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 734 if (!rank) { 735 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 736 } else { 737 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 738 } 739 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 740 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 741 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 742 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 743 744 /* Copy the matrix ... This isn't the most efficient means, 745 but it's quick for now */ 746 A->insertmode = INSERT_VALUES; 747 row = mat->rmap->rstart; m = mdn->A->rmap->n; 748 for (i=0; i<m; i++) { 749 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 750 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 751 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 752 row++; 753 } 754 755 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 756 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 757 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 758 if (!rank) { 759 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 760 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 761 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 762 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 763 } 764 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 765 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 766 ierr = MatDestroy(&A);CHKERRQ(ierr); 767 } 768 PetscFunctionReturn(0); 769 } 770 771 #undef __FUNCT__ 772 #define __FUNCT__ "MatView_MPIDense" 773 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 774 { 775 PetscErrorCode ierr; 776 PetscBool iascii,isbinary,isdraw,issocket; 777 778 PetscFunctionBegin; 779 780 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 781 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 782 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 783 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 784 785 if (iascii || issocket || isdraw) { 786 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 787 } else if (isbinary) { 788 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 789 } else { 790 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatGetInfo_MPIDense" 797 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 798 { 799 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 800 Mat mdn = mat->A; 801 PetscErrorCode ierr; 802 PetscReal isend[5],irecv[5]; 803 804 PetscFunctionBegin; 805 info->block_size = 1.0; 806 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 807 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 808 isend[3] = info->memory; isend[4] = info->mallocs; 809 if (flag == MAT_LOCAL) { 810 info->nz_used = isend[0]; 811 info->nz_allocated = isend[1]; 812 info->nz_unneeded = isend[2]; 813 info->memory = isend[3]; 814 info->mallocs = isend[4]; 815 } else if (flag == MAT_GLOBAL_MAX) { 816 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 817 info->nz_used = irecv[0]; 818 info->nz_allocated = irecv[1]; 819 info->nz_unneeded = irecv[2]; 820 info->memory = irecv[3]; 821 info->mallocs = irecv[4]; 822 } else if (flag == MAT_GLOBAL_SUM) { 823 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 824 info->nz_used = irecv[0]; 825 info->nz_allocated = irecv[1]; 826 info->nz_unneeded = irecv[2]; 827 info->memory = irecv[3]; 828 info->mallocs = irecv[4]; 829 } 830 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 831 info->fill_ratio_needed = 0; 832 info->factor_mallocs = 0; 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatSetOption_MPIDense" 838 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 839 { 840 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 841 PetscErrorCode ierr; 842 843 PetscFunctionBegin; 844 switch (op) { 845 case MAT_NEW_NONZERO_LOCATIONS: 846 case MAT_NEW_NONZERO_LOCATION_ERR: 847 case MAT_NEW_NONZERO_ALLOCATION_ERR: 848 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 849 break; 850 case MAT_ROW_ORIENTED: 851 a->roworiented = flg; 852 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 853 break; 854 case MAT_NEW_DIAGONALS: 855 case MAT_USE_HASH_TABLE: 856 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 857 break; 858 case MAT_IGNORE_OFF_PROC_ENTRIES: 859 a->donotstash = flg; 860 break; 861 case MAT_SYMMETRIC: 862 case MAT_STRUCTURALLY_SYMMETRIC: 863 case MAT_HERMITIAN: 864 case MAT_SYMMETRY_ETERNAL: 865 case MAT_IGNORE_LOWER_TRIANGULAR: 866 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 867 break; 868 default: 869 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 870 } 871 PetscFunctionReturn(0); 872 } 873 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "MatDiagonalScale_MPIDense" 877 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 878 { 879 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 880 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 881 PetscScalar *l,*r,x,*v; 882 PetscErrorCode ierr; 883 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 884 885 PetscFunctionBegin; 886 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 887 if (ll) { 888 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 889 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 890 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 891 for (i=0; i<m; i++) { 892 x = l[i]; 893 v = mat->v + i; 894 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 895 } 896 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 897 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 898 } 899 if (rr) { 900 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 901 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 902 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 905 for (i=0; i<n; i++) { 906 x = r[i]; 907 v = mat->v + i*m; 908 for (j=0; j<m; j++) { (*v++) *= x;} 909 } 910 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 911 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } 915 916 #undef __FUNCT__ 917 #define __FUNCT__ "MatNorm_MPIDense" 918 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 919 { 920 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 921 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 922 PetscErrorCode ierr; 923 PetscInt i,j; 924 PetscReal sum = 0.0; 925 PetscScalar *v = mat->v; 926 927 PetscFunctionBegin; 928 if (mdn->size == 1) { 929 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 930 } else { 931 if (type == NORM_FROBENIUS) { 932 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 933 #if defined(PETSC_USE_COMPLEX) 934 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 935 #else 936 sum += (*v)*(*v); v++; 937 #endif 938 } 939 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 940 *nrm = sqrt(*nrm); 941 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 942 } else if (type == NORM_1) { 943 PetscReal *tmp,*tmp2; 944 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 945 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 946 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 947 *nrm = 0.0; 948 v = mat->v; 949 for (j=0; j<mdn->A->cmap->n; j++) { 950 for (i=0; i<mdn->A->rmap->n; i++) { 951 tmp[j] += PetscAbsScalar(*v); v++; 952 } 953 } 954 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 955 for (j=0; j<A->cmap->N; j++) { 956 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 957 } 958 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 959 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 960 } else if (type == NORM_INFINITY) { /* max row norm */ 961 PetscReal ntemp; 962 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 963 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 964 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 965 } 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatTranspose_MPIDense" 971 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 972 { 973 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 974 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 975 Mat B; 976 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 977 PetscErrorCode ierr; 978 PetscInt j,i; 979 PetscScalar *v; 980 981 PetscFunctionBegin; 982 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 983 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 984 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 985 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 986 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 987 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 988 } else { 989 B = *matout; 990 } 991 992 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 993 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 994 for (i=0; i<m; i++) rwork[i] = rstart + i; 995 for (j=0; j<n; j++) { 996 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 997 v += m; 998 } 999 ierr = PetscFree(rwork);CHKERRQ(ierr); 1000 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1001 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1002 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1003 *matout = B; 1004 } else { 1005 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1006 } 1007 PetscFunctionReturn(0); 1008 } 1009 1010 1011 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1012 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1016 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1017 { 1018 PetscErrorCode ierr; 1019 1020 PetscFunctionBegin; 1021 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 #if defined(PETSC_HAVE_PLAPACK) 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1029 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1030 { 1031 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1032 PetscErrorCode ierr; 1033 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1034 PetscScalar *array; 1035 PetscReal one = 1.0; 1036 1037 PetscFunctionBegin; 1038 /* Copy A into F->lu->A */ 1039 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1040 ierr = PLA_API_begin();CHKERRQ(ierr); 1041 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1042 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1043 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1044 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1045 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1046 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1047 ierr = PLA_API_end();CHKERRQ(ierr); 1048 lu->rstart = rstart; 1049 PetscFunctionReturn(0); 1050 } 1051 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1054 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1055 { 1056 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1057 PetscErrorCode ierr; 1058 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1059 PetscScalar *array; 1060 PetscReal one = 1.0; 1061 1062 PetscFunctionBegin; 1063 /* Copy F into A->lu->A */ 1064 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1065 ierr = PLA_API_begin();CHKERRQ(ierr); 1066 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1067 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1068 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1069 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1070 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1071 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1072 ierr = PLA_API_end();CHKERRQ(ierr); 1073 lu->rstart = rstart; 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1079 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1080 { 1081 PetscErrorCode ierr; 1082 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1083 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1084 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1085 PLA_Obj alpha = NULL,beta = NULL; 1086 1087 PetscFunctionBegin; 1088 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1089 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1090 1091 /* 1092 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1093 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1094 */ 1095 1096 /* do the multiply in PLA */ 1097 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1098 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1099 CHKMEMQ; 1100 1101 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1102 CHKMEMQ; 1103 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1104 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1105 1106 /* 1107 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1108 */ 1109 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1115 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1116 { 1117 PetscErrorCode ierr; 1118 PetscInt m=A->rmap->n,n=B->cmap->n; 1119 Mat Cmat; 1120 1121 PetscFunctionBegin; 1122 if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1123 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1124 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1125 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1126 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1127 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1128 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1129 1130 *C = Cmat; 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1136 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1137 { 1138 PetscErrorCode ierr; 1139 1140 PetscFunctionBegin; 1141 if (scall == MAT_INITIAL_MATRIX){ 1142 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1143 } 1144 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatSolve_MPIDense" 1150 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1151 { 1152 MPI_Comm comm = ((PetscObject)A)->comm; 1153 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1154 PetscErrorCode ierr; 1155 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1156 PetscScalar *array; 1157 PetscReal one = 1.0; 1158 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1159 PLA_Obj v_pla = NULL; 1160 PetscScalar *loc_buf; 1161 Vec loc_x; 1162 1163 PetscFunctionBegin; 1164 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1165 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1166 1167 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1168 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1169 PLA_Obj_set_to_zero(v_pla); 1170 1171 /* Copy b into rhs_pla */ 1172 PLA_API_begin(); 1173 PLA_Obj_API_open(v_pla); 1174 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1175 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1176 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1177 PLA_Obj_API_close(v_pla); 1178 PLA_API_end(); 1179 1180 if (A->factortype == MAT_FACTOR_LU){ 1181 /* Apply the permutations to the right hand sides */ 1182 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1183 1184 /* Solve L y = b, overwriting b with y */ 1185 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1186 1187 /* Solve U x = y (=b), overwriting b with x */ 1188 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1189 } else { /* MAT_FACTOR_CHOLESKY */ 1190 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1191 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1192 PLA_NONUNIT_DIAG,lu->A,v_pla); 1193 } 1194 1195 /* Copy PLAPACK x into Petsc vector x */ 1196 PLA_Obj_local_length(v_pla, &loc_m); 1197 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1198 PLA_Obj_local_stride(v_pla, &loc_stride); 1199 /* 1200 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1201 */ 1202 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1203 if (!lu->pla_solved){ 1204 1205 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1206 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1207 1208 /* Create IS and cts for VecScatterring */ 1209 PLA_Obj_local_length(v_pla, &loc_m); 1210 PLA_Obj_local_stride(v_pla, &loc_stride); 1211 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1212 1213 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1214 for (i=0; i<loc_m; i+=lu->nb){ 1215 j = 0; 1216 while (j < lu->nb && i+j < loc_m){ 1217 idx_petsc[i+j] = rstart + j; j++; 1218 } 1219 rstart += size*lu->nb; 1220 } 1221 1222 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1223 1224 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);CHKERRQ(ierr); 1225 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);CHKERRQ(ierr); 1226 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1227 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1228 } 1229 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1231 1232 /* Free data */ 1233 ierr = VecDestroy(&loc_x);CHKERRQ(ierr); 1234 PLA_Obj_free(&v_pla); 1235 1236 lu->pla_solved = PETSC_TRUE; 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1242 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1243 { 1244 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1245 PetscErrorCode ierr; 1246 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1247 PetscInt info_pla=0; 1248 PetscScalar *array,one = 1.0; 1249 1250 PetscFunctionBegin; 1251 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1252 PLA_Obj_free(&lu->A); 1253 PLA_Obj_free (&lu->pivots); 1254 } 1255 /* Create PLAPACK matrix object */ 1256 lu->A = NULL; lu->pivots = NULL; 1257 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1258 PLA_Obj_set_to_zero(lu->A); 1259 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1260 1261 /* Copy A into lu->A */ 1262 PLA_API_begin(); 1263 PLA_Obj_API_open(lu->A); 1264 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1265 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1266 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1267 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1268 PLA_Obj_API_close(lu->A); 1269 PLA_API_end(); 1270 1271 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1272 info_pla = PLA_LU(lu->A,lu->pivots); 1273 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1274 1275 lu->rstart = rstart; 1276 lu->mstruct = SAME_NONZERO_PATTERN; 1277 F->ops->solve = MatSolve_MPIDense; 1278 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1284 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1285 { 1286 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1287 PetscErrorCode ierr; 1288 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1289 PetscInt info_pla=0; 1290 PetscScalar *array,one = 1.0; 1291 1292 PetscFunctionBegin; 1293 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1294 PLA_Obj_free(&lu->A); 1295 } 1296 /* Create PLAPACK matrix object */ 1297 lu->A = NULL; 1298 lu->pivots = NULL; 1299 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1300 1301 /* Copy A into lu->A */ 1302 PLA_API_begin(); 1303 PLA_Obj_API_open(lu->A); 1304 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1305 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1306 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1307 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1308 PLA_Obj_API_close(lu->A); 1309 PLA_API_end(); 1310 1311 /* Factor P A -> Chol */ 1312 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1313 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1314 1315 lu->rstart = rstart; 1316 lu->mstruct = SAME_NONZERO_PATTERN; 1317 F->ops->solve = MatSolve_MPIDense; 1318 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1319 PetscFunctionReturn(0); 1320 } 1321 1322 /* Note the Petsc perm permutation is ignored */ 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1325 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1326 { 1327 PetscErrorCode ierr; 1328 PetscBool issymmetric,set; 1329 1330 PetscFunctionBegin; 1331 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1332 if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1333 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1334 PetscFunctionReturn(0); 1335 } 1336 1337 /* Note the Petsc r and c permutations are ignored */ 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1340 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1341 { 1342 PetscErrorCode ierr; 1343 PetscInt M = A->rmap->N; 1344 Mat_Plapack *lu; 1345 1346 PetscFunctionBegin; 1347 lu = (Mat_Plapack*)F->spptr; 1348 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1349 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 EXTERN_C_BEGIN 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1356 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1357 { 1358 PetscFunctionBegin; 1359 *type = MATSOLVERPLAPACK; 1360 PetscFunctionReturn(0); 1361 } 1362 EXTERN_C_END 1363 1364 EXTERN_C_BEGIN 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1367 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1368 { 1369 PetscErrorCode ierr; 1370 Mat_Plapack *lu; 1371 PetscMPIInt size; 1372 PetscInt M=A->rmap->N; 1373 1374 PetscFunctionBegin; 1375 /* Create the factorization matrix */ 1376 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1377 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1378 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1379 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1380 (*F)->spptr = (void*)lu; 1381 1382 /* Set default Plapack parameters */ 1383 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1384 lu->nb = M/size; 1385 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1386 1387 /* Set runtime options */ 1388 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1389 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1390 PetscOptionsEnd(); 1391 1392 /* Create object distribution template */ 1393 lu->templ = NULL; 1394 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1395 1396 /* Set the datatype */ 1397 #if defined(PETSC_USE_COMPLEX) 1398 lu->datatype = MPI_DOUBLE_COMPLEX; 1399 #else 1400 lu->datatype = MPI_DOUBLE; 1401 #endif 1402 1403 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1404 1405 1406 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1407 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1408 1409 if (ftype == MAT_FACTOR_LU) { 1410 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1411 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1412 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1413 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1414 (*F)->factortype = ftype; 1415 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 EXTERN_C_END 1419 #endif 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1423 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1424 { 1425 #if defined(PETSC_HAVE_PLAPACK) 1426 PetscErrorCode ierr; 1427 #endif 1428 1429 PetscFunctionBegin; 1430 #if defined(PETSC_HAVE_PLAPACK) 1431 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1432 #else 1433 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1434 #endif 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatAXPY_MPIDense" 1440 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1441 { 1442 PetscErrorCode ierr; 1443 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1444 1445 PetscFunctionBegin; 1446 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1447 PetscFunctionReturn(0); 1448 } 1449 1450 #undef __FUNCT__ 1451 #define __FUNCT__ "MatConjugate_MPIDense" 1452 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1453 { 1454 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1455 PetscErrorCode ierr; 1456 1457 PetscFunctionBegin; 1458 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } 1461 1462 #undef __FUNCT__ 1463 #define __FUNCT__ "MatRealPart_MPIDense" 1464 PetscErrorCode MatRealPart_MPIDense(Mat A) 1465 { 1466 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1467 PetscErrorCode ierr; 1468 1469 PetscFunctionBegin; 1470 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1476 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1477 { 1478 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1479 PetscErrorCode ierr; 1480 1481 PetscFunctionBegin; 1482 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } 1485 1486 /* -------------------------------------------------------------------*/ 1487 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1488 MatGetRow_MPIDense, 1489 MatRestoreRow_MPIDense, 1490 MatMult_MPIDense, 1491 /* 4*/ MatMultAdd_MPIDense, 1492 MatMultTranspose_MPIDense, 1493 MatMultTransposeAdd_MPIDense, 1494 0, 1495 0, 1496 0, 1497 /*10*/ 0, 1498 0, 1499 0, 1500 0, 1501 MatTranspose_MPIDense, 1502 /*15*/ MatGetInfo_MPIDense, 1503 MatEqual_MPIDense, 1504 MatGetDiagonal_MPIDense, 1505 MatDiagonalScale_MPIDense, 1506 MatNorm_MPIDense, 1507 /*20*/ MatAssemblyBegin_MPIDense, 1508 MatAssemblyEnd_MPIDense, 1509 MatSetOption_MPIDense, 1510 MatZeroEntries_MPIDense, 1511 /*24*/ MatZeroRows_MPIDense, 1512 0, 1513 0, 1514 0, 1515 0, 1516 /*29*/ MatSetUpPreallocation_MPIDense, 1517 0, 1518 0, 1519 MatGetArray_MPIDense, 1520 MatRestoreArray_MPIDense, 1521 /*34*/ MatDuplicate_MPIDense, 1522 0, 1523 0, 1524 0, 1525 0, 1526 /*39*/ MatAXPY_MPIDense, 1527 MatGetSubMatrices_MPIDense, 1528 0, 1529 MatGetValues_MPIDense, 1530 0, 1531 /*44*/ 0, 1532 MatScale_MPIDense, 1533 0, 1534 0, 1535 0, 1536 /*49*/ 0, 1537 0, 1538 0, 1539 0, 1540 0, 1541 /*54*/ 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 /*59*/ MatGetSubMatrix_MPIDense, 1547 MatDestroy_MPIDense, 1548 MatView_MPIDense, 1549 0, 1550 0, 1551 /*64*/ 0, 1552 0, 1553 0, 1554 0, 1555 0, 1556 /*69*/ 0, 1557 0, 1558 0, 1559 0, 1560 0, 1561 /*74*/ 0, 1562 0, 1563 0, 1564 0, 1565 0, 1566 /*79*/ 0, 1567 0, 1568 0, 1569 0, 1570 /*83*/ MatLoad_MPIDense, 1571 0, 1572 0, 1573 0, 1574 0, 1575 0, 1576 /*89*/ 1577 #if defined(PETSC_HAVE_PLAPACK) 1578 MatMatMult_MPIDense_MPIDense, 1579 MatMatMultSymbolic_MPIDense_MPIDense, 1580 MatMatMultNumeric_MPIDense_MPIDense, 1581 #else 1582 0, 1583 0, 1584 0, 1585 #endif 1586 0, 1587 0, 1588 /*94*/ 0, 1589 0, 1590 0, 1591 0, 1592 0, 1593 /*99*/ 0, 1594 0, 1595 0, 1596 MatConjugate_MPIDense, 1597 0, 1598 /*104*/0, 1599 MatRealPart_MPIDense, 1600 MatImaginaryPart_MPIDense, 1601 0, 1602 0, 1603 /*109*/0, 1604 0, 1605 0, 1606 0, 1607 0, 1608 /*114*/0, 1609 0, 1610 0, 1611 0, 1612 0, 1613 /*119*/0, 1614 0, 1615 0, 1616 0 1617 }; 1618 1619 EXTERN_C_BEGIN 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1622 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1623 { 1624 Mat_MPIDense *a; 1625 PetscErrorCode ierr; 1626 1627 PetscFunctionBegin; 1628 mat->preallocated = PETSC_TRUE; 1629 /* Note: For now, when data is specified above, this assumes the user correctly 1630 allocates the local dense storage space. We should add error checking. */ 1631 1632 a = (Mat_MPIDense*)mat->data; 1633 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1634 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1635 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1636 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1637 a->nvec = mat->cmap->n; 1638 1639 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1640 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1641 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1642 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1643 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1644 PetscFunctionReturn(0); 1645 } 1646 EXTERN_C_END 1647 1648 /*MC 1649 MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1650 1651 run ./configure with the option --download-plapack 1652 1653 1654 Options Database Keys: 1655 . -mat_plapack_nprows <n> - number of rows in processor partition 1656 . -mat_plapack_npcols <n> - number of columns in processor partition 1657 . -mat_plapack_nb <n> - block size of template vector 1658 . -mat_plapack_nb_alg <n> - algorithmic block size 1659 - -mat_plapack_ckerror <n> - error checking flag 1660 1661 Level: intermediate 1662 1663 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1664 1665 M*/ 1666 1667 EXTERN_C_BEGIN 1668 #undef __FUNCT__ 1669 #define __FUNCT__ "MatCreate_MPIDense" 1670 PetscErrorCode MatCreate_MPIDense(Mat mat) 1671 { 1672 Mat_MPIDense *a; 1673 PetscErrorCode ierr; 1674 1675 PetscFunctionBegin; 1676 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1677 mat->data = (void*)a; 1678 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1679 1680 mat->insertmode = NOT_SET_VALUES; 1681 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1682 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1683 1684 /* build cache for off array entries formed */ 1685 a->donotstash = PETSC_FALSE; 1686 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1687 1688 /* stuff used for matrix vector multiply */ 1689 a->lvec = 0; 1690 a->Mvctx = 0; 1691 a->roworiented = PETSC_TRUE; 1692 1693 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1694 "MatGetDiagonalBlock_MPIDense", 1695 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1696 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1697 "MatMPIDenseSetPreallocation_MPIDense", 1698 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1699 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1700 "MatMatMult_MPIAIJ_MPIDense", 1701 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1702 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1703 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1704 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1705 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1706 "MatMatMultNumeric_MPIAIJ_MPIDense", 1707 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1708 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1709 "MatGetFactor_mpidense_petsc", 1710 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1711 #if defined(PETSC_HAVE_PLAPACK) 1712 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1713 "MatGetFactor_mpidense_plapack", 1714 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1715 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1716 #endif 1717 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1718 1719 PetscFunctionReturn(0); 1720 } 1721 EXTERN_C_END 1722 1723 /*MC 1724 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1725 1726 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1727 and MATMPIDENSE otherwise. 1728 1729 Options Database Keys: 1730 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1731 1732 Level: beginner 1733 1734 1735 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1736 M*/ 1737 1738 EXTERN_C_BEGIN 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatCreate_Dense" 1741 PetscErrorCode MatCreate_Dense(Mat A) 1742 { 1743 PetscErrorCode ierr; 1744 PetscMPIInt size; 1745 1746 PetscFunctionBegin; 1747 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1748 if (size == 1) { 1749 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1750 } else { 1751 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1752 } 1753 PetscFunctionReturn(0); 1754 } 1755 EXTERN_C_END 1756 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1759 /*@C 1760 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1761 1762 Not collective 1763 1764 Input Parameters: 1765 . A - the matrix 1766 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1767 to control all matrix memory allocation. 1768 1769 Notes: 1770 The dense format is fully compatible with standard Fortran 77 1771 storage by columns. 1772 1773 The data input variable is intended primarily for Fortran programmers 1774 who wish to allocate their own matrix memory space. Most users should 1775 set data=PETSC_NULL. 1776 1777 Level: intermediate 1778 1779 .keywords: matrix,dense, parallel 1780 1781 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1782 @*/ 1783 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1784 { 1785 PetscErrorCode ierr; 1786 1787 PetscFunctionBegin; 1788 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr); 1789 PetscFunctionReturn(0); 1790 } 1791 1792 #undef __FUNCT__ 1793 #define __FUNCT__ "MatCreateMPIDense" 1794 /*@C 1795 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1796 1797 Collective on MPI_Comm 1798 1799 Input Parameters: 1800 + comm - MPI communicator 1801 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1802 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1803 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1804 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1805 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1806 to control all matrix memory allocation. 1807 1808 Output Parameter: 1809 . A - the matrix 1810 1811 Notes: 1812 The dense format is fully compatible with standard Fortran 77 1813 storage by columns. 1814 1815 The data input variable is intended primarily for Fortran programmers 1816 who wish to allocate their own matrix memory space. Most users should 1817 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1818 1819 The user MUST specify either the local or global matrix dimensions 1820 (possibly both). 1821 1822 Level: intermediate 1823 1824 .keywords: matrix,dense, parallel 1825 1826 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1827 @*/ 1828 PetscErrorCode MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1829 { 1830 PetscErrorCode ierr; 1831 PetscMPIInt size; 1832 1833 PetscFunctionBegin; 1834 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1835 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1836 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1837 if (size > 1) { 1838 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1839 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1840 } else { 1841 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1842 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1843 } 1844 PetscFunctionReturn(0); 1845 } 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatDuplicate_MPIDense" 1849 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1850 { 1851 Mat mat; 1852 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1853 PetscErrorCode ierr; 1854 1855 PetscFunctionBegin; 1856 *newmat = 0; 1857 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1858 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1859 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1860 a = (Mat_MPIDense*)mat->data; 1861 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1862 1863 mat->factortype = A->factortype; 1864 mat->assembled = PETSC_TRUE; 1865 mat->preallocated = PETSC_TRUE; 1866 1867 a->size = oldmat->size; 1868 a->rank = oldmat->rank; 1869 mat->insertmode = NOT_SET_VALUES; 1870 a->nvec = oldmat->nvec; 1871 a->donotstash = oldmat->donotstash; 1872 1873 ierr = PetscLayoutCopy(A->rmap,&mat->rmap);CHKERRQ(ierr); 1874 ierr = PetscLayoutCopy(A->cmap,&mat->cmap);CHKERRQ(ierr); 1875 1876 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1877 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1878 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1879 1880 *newmat = mat; 1881 PetscFunctionReturn(0); 1882 } 1883 1884 #undef __FUNCT__ 1885 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1886 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1887 { 1888 PetscErrorCode ierr; 1889 PetscMPIInt rank,size; 1890 PetscInt *rowners,i,m,nz,j; 1891 PetscScalar *array,*vals,*vals_ptr; 1892 MPI_Status status; 1893 1894 PetscFunctionBegin; 1895 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1896 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1897 1898 /* determine ownership of all rows */ 1899 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1900 else m = newmat->rmap->n; 1901 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1902 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1903 rowners[0] = 0; 1904 for (i=2; i<=size; i++) { 1905 rowners[i] += rowners[i-1]; 1906 } 1907 1908 if (!sizesset) { 1909 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1910 } 1911 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1912 ierr = MatGetArray(newmat,&array);CHKERRQ(ierr); 1913 1914 if (!rank) { 1915 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1916 1917 /* read in my part of the matrix numerical values */ 1918 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1919 1920 /* insert into matrix-by row (this is why cannot directly read into array */ 1921 vals_ptr = vals; 1922 for (i=0; i<m; i++) { 1923 for (j=0; j<N; j++) { 1924 array[i + j*m] = *vals_ptr++; 1925 } 1926 } 1927 1928 /* read in other processors and ship out */ 1929 for (i=1; i<size; i++) { 1930 nz = (rowners[i+1] - rowners[i])*N; 1931 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1932 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1933 } 1934 } else { 1935 /* receive numeric values */ 1936 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1937 1938 /* receive message of values*/ 1939 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr); 1940 1941 /* insert into matrix-by row (this is why cannot directly read into array */ 1942 vals_ptr = vals; 1943 for (i=0; i<m; i++) { 1944 for (j=0; j<N; j++) { 1945 array[i + j*m] = *vals_ptr++; 1946 } 1947 } 1948 } 1949 ierr = PetscFree(rowners);CHKERRQ(ierr); 1950 ierr = PetscFree(vals);CHKERRQ(ierr); 1951 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1952 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1953 PetscFunctionReturn(0); 1954 } 1955 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "MatLoad_MPIDense" 1958 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1959 { 1960 PetscScalar *vals,*svals; 1961 MPI_Comm comm = ((PetscObject)viewer)->comm; 1962 MPI_Status status; 1963 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1964 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1965 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1966 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1967 int fd; 1968 PetscErrorCode ierr; 1969 1970 PetscFunctionBegin; 1971 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1972 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1973 if (!rank) { 1974 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1975 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1976 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1977 } 1978 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1979 1980 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1981 M = header[1]; N = header[2]; nz = header[3]; 1982 1983 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1984 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1985 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1986 1987 /* If global sizes are set, check if they are consistent with that given in the file */ 1988 if (sizesset) { 1989 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1990 } 1991 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1992 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1993 1994 /* 1995 Handle case where matrix is stored on disk as a dense matrix 1996 */ 1997 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1998 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1999 PetscFunctionReturn(0); 2000 } 2001 2002 /* determine ownership of all rows */ 2003 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 2004 else m = PetscMPIIntCast(newmat->rmap->n); 2005 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2006 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2007 rowners[0] = 0; 2008 for (i=2; i<=size; i++) { 2009 rowners[i] += rowners[i-1]; 2010 } 2011 rstart = rowners[rank]; 2012 rend = rowners[rank+1]; 2013 2014 /* distribute row lengths to all processors */ 2015 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 2016 if (!rank) { 2017 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2018 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2019 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2020 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2021 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2022 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2023 } else { 2024 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2025 } 2026 2027 if (!rank) { 2028 /* calculate the number of nonzeros on each processor */ 2029 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2030 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2031 for (i=0; i<size; i++) { 2032 for (j=rowners[i]; j< rowners[i+1]; j++) { 2033 procsnz[i] += rowlengths[j]; 2034 } 2035 } 2036 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2037 2038 /* determine max buffer needed and allocate it */ 2039 maxnz = 0; 2040 for (i=0; i<size; i++) { 2041 maxnz = PetscMax(maxnz,procsnz[i]); 2042 } 2043 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2044 2045 /* read in my part of the matrix column indices */ 2046 nz = procsnz[0]; 2047 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2048 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2049 2050 /* read in every one elses and ship off */ 2051 for (i=1; i<size; i++) { 2052 nz = procsnz[i]; 2053 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2054 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2055 } 2056 ierr = PetscFree(cols);CHKERRQ(ierr); 2057 } else { 2058 /* determine buffer space needed for message */ 2059 nz = 0; 2060 for (i=0; i<m; i++) { 2061 nz += ourlens[i]; 2062 } 2063 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2064 2065 /* receive message of column indices*/ 2066 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2067 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2068 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2069 } 2070 2071 /* loop over local rows, determining number of off diagonal entries */ 2072 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2073 jj = 0; 2074 for (i=0; i<m; i++) { 2075 for (j=0; j<ourlens[i]; j++) { 2076 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2077 jj++; 2078 } 2079 } 2080 2081 /* create our matrix */ 2082 for (i=0; i<m; i++) { 2083 ourlens[i] -= offlens[i]; 2084 } 2085 2086 if (!sizesset) { 2087 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2088 } 2089 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 2090 for (i=0; i<m; i++) { 2091 ourlens[i] += offlens[i]; 2092 } 2093 2094 if (!rank) { 2095 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2096 2097 /* read in my part of the matrix numerical values */ 2098 nz = procsnz[0]; 2099 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2100 2101 /* insert into matrix */ 2102 jj = rstart; 2103 smycols = mycols; 2104 svals = vals; 2105 for (i=0; i<m; i++) { 2106 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2107 smycols += ourlens[i]; 2108 svals += ourlens[i]; 2109 jj++; 2110 } 2111 2112 /* read in other processors and ship out */ 2113 for (i=1; i<size; i++) { 2114 nz = procsnz[i]; 2115 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2116 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2117 } 2118 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2119 } else { 2120 /* receive numeric values */ 2121 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2122 2123 /* receive message of values*/ 2124 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2125 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2126 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2127 2128 /* insert into matrix */ 2129 jj = rstart; 2130 smycols = mycols; 2131 svals = vals; 2132 for (i=0; i<m; i++) { 2133 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2134 smycols += ourlens[i]; 2135 svals += ourlens[i]; 2136 jj++; 2137 } 2138 } 2139 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2140 ierr = PetscFree(vals);CHKERRQ(ierr); 2141 ierr = PetscFree(mycols);CHKERRQ(ierr); 2142 ierr = PetscFree(rowners);CHKERRQ(ierr); 2143 2144 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2145 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2146 PetscFunctionReturn(0); 2147 } 2148 2149 #undef __FUNCT__ 2150 #define __FUNCT__ "MatEqual_MPIDense" 2151 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2152 { 2153 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2154 Mat a,b; 2155 PetscBool flg; 2156 PetscErrorCode ierr; 2157 2158 PetscFunctionBegin; 2159 a = matA->A; 2160 b = matB->A; 2161 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2162 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 #if defined(PETSC_HAVE_PLAPACK) 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2170 /*@C 2171 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2172 Level: developer 2173 2174 .keywords: Petsc, destroy, package, PLAPACK 2175 .seealso: PetscFinalize() 2176 @*/ 2177 PetscErrorCode PetscPLAPACKFinalizePackage(void) 2178 { 2179 PetscErrorCode ierr; 2180 2181 PetscFunctionBegin; 2182 ierr = PLA_Finalize();CHKERRQ(ierr); 2183 PetscFunctionReturn(0); 2184 } 2185 2186 #undef __FUNCT__ 2187 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2188 /*@C 2189 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2190 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2191 2192 Input Parameter: 2193 . comm - the communicator the matrix lives on 2194 2195 Level: developer 2196 2197 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2198 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2199 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2200 cannot overlap. 2201 2202 .keywords: Petsc, initialize, package, PLAPACK 2203 .seealso: PetscSysInitializePackage(), PetscInitialize() 2204 @*/ 2205 PetscErrorCode PetscPLAPACKInitializePackage(MPI_Comm comm) 2206 { 2207 PetscMPIInt size; 2208 PetscErrorCode ierr; 2209 2210 PetscFunctionBegin; 2211 if (!PLA_Initialized(PETSC_NULL)) { 2212 2213 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2214 Plapack_nprows = 1; 2215 Plapack_npcols = size; 2216 2217 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2218 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2219 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2220 #if defined(PETSC_USE_DEBUG) 2221 Plapack_ierror = 3; 2222 #else 2223 Plapack_ierror = 0; 2224 #endif 2225 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2226 if (Plapack_ierror){ 2227 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2228 } else { 2229 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2230 } 2231 2232 Plapack_nb_alg = 0; 2233 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2234 if (Plapack_nb_alg) { 2235 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2236 } 2237 PetscOptionsEnd(); 2238 2239 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2240 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2241 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2242 } 2243 PetscFunctionReturn(0); 2244 } 2245 2246 #endif 2247