1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #if defined(PETSC_HAVE_PLAPACK) 9 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 10 static MPI_Comm Plapack_comm_2d; 11 EXTERN_C_BEGIN 12 #include <PLA.h> 13 EXTERN_C_END 14 15 typedef struct { 16 PLA_Obj A,pivots; 17 PLA_Template templ; 18 MPI_Datatype datatype; 19 PetscInt nb,rstart; 20 VecScatter ctx; 21 IS is_pla,is_petsc; 22 PetscBool pla_solved; 23 MatStructure mstruct; 24 } Mat_Plapack; 25 #endif 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "MatDenseGetLocalMatrix" 29 /*@ 30 31 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 32 matrix that represents the operator. For sequential matrices it returns itself. 33 34 Input Parameter: 35 . A - the Seq or MPI dense matrix 36 37 Output Parameter: 38 . B - the inner matrix 39 40 Level: intermediate 41 42 @*/ 43 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscBool flg; 48 49 PetscFunctionBegin; 50 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 51 if (flg) { 52 *B = mat->A; 53 } else { 54 *B = A; 55 } 56 PetscFunctionReturn(0); 57 } 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatGetRow_MPIDense" 61 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 62 { 63 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 66 67 PetscFunctionBegin; 68 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 69 lrow = row - rstart; 70 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatRestoreRow_MPIDense" 76 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 77 { 78 PetscErrorCode ierr; 79 80 PetscFunctionBegin; 81 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 82 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 83 PetscFunctionReturn(0); 84 } 85 86 EXTERN_C_BEGIN 87 #undef __FUNCT__ 88 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 89 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *B) 90 { 91 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 92 PetscErrorCode ierr; 93 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 94 PetscScalar *array; 95 MPI_Comm comm; 96 97 PetscFunctionBegin; 98 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 99 100 /* The reuse aspect is not implemented efficiently */ 101 if (reuse) { ierr = MatDestroy(B);CHKERRQ(ierr);} 102 103 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 104 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 105 ierr = MatCreate(comm,B);CHKERRQ(ierr); 106 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 107 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 108 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 109 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 110 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 111 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 113 *iscopy = PETSC_TRUE; 114 PetscFunctionReturn(0); 115 } 116 EXTERN_C_END 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPIDense" 120 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 123 PetscErrorCode ierr; 124 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 125 PetscBool roworiented = A->roworiented; 126 127 PetscFunctionBegin; 128 if (v) PetscValidScalarPointer(v,6); 129 for (i=0; i<m; i++) { 130 if (idxm[i] < 0) continue; 131 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 132 if (idxm[i] >= rstart && idxm[i] < rend) { 133 row = idxm[i] - rstart; 134 if (roworiented) { 135 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 136 } else { 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 141 } 142 } 143 } else { 144 if (!A->donotstash) { 145 if (roworiented) { 146 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 147 } else { 148 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 149 } 150 } 151 } 152 } 153 PetscFunctionReturn(0); 154 } 155 156 #undef __FUNCT__ 157 #define __FUNCT__ "MatGetValues_MPIDense" 158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 159 { 160 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 161 PetscErrorCode ierr; 162 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 163 164 PetscFunctionBegin; 165 for (i=0; i<m; i++) { 166 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 167 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 168 if (idxm[i] >= rstart && idxm[i] < rend) { 169 row = idxm[i] - rstart; 170 for (j=0; j<n; j++) { 171 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 172 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 173 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 174 } 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 176 } 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatGetArray_MPIDense" 182 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 183 { 184 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 185 PetscErrorCode ierr; 186 187 PetscFunctionBegin; 188 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 194 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 195 { 196 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 197 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 198 PetscErrorCode ierr; 199 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 200 const PetscInt *irow,*icol; 201 PetscScalar *av,*bv,*v = lmat->v; 202 Mat newmat; 203 IS iscol_local; 204 205 PetscFunctionBegin; 206 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 207 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 208 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 209 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 210 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 211 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 212 213 /* No parallel redistribution currently supported! Should really check each index set 214 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 215 original matrix! */ 216 217 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 218 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 219 220 /* Check submatrix call */ 221 if (scall == MAT_REUSE_MATRIX) { 222 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 223 /* Really need to test rows and column sizes! */ 224 newmat = *B; 225 } else { 226 /* Create and fill new matrix */ 227 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 228 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 229 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 230 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 231 } 232 233 /* Now extract the data pointers and do the copy, column at a time */ 234 newmatd = (Mat_MPIDense*)newmat->data; 235 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 236 237 for (i=0; i<Ncols; i++) { 238 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 239 for (j=0; j<nrows; j++) { 240 *bv++ = av[irow[j] - rstart]; 241 } 242 } 243 244 /* Assemble the matrices so that the correct flags are set */ 245 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 246 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 248 /* Free work space */ 249 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 250 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 251 *B = newmat; 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatRestoreArray_MPIDense" 257 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 258 { 259 PetscFunctionBegin; 260 PetscFunctionReturn(0); 261 } 262 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 265 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 266 { 267 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 268 MPI_Comm comm = ((PetscObject)mat)->comm; 269 PetscErrorCode ierr; 270 PetscInt nstash,reallocs; 271 InsertMode addv; 272 273 PetscFunctionBegin; 274 /* make sure all processors are either in INSERTMODE or ADDMODE */ 275 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 276 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 277 mat->insertmode = addv; /* in case this processor had no cache */ 278 279 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 280 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 281 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 282 PetscFunctionReturn(0); 283 } 284 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 287 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 288 { 289 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 290 PetscErrorCode ierr; 291 PetscInt i,*row,*col,flg,j,rstart,ncols; 292 PetscMPIInt n; 293 PetscScalar *val; 294 InsertMode addv=mat->insertmode; 295 296 PetscFunctionBegin; 297 /* wait on receives */ 298 while (1) { 299 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 300 if (!flg) break; 301 302 for (i=0; i<n;) { 303 /* Now identify the consecutive vals belonging to the same row */ 304 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 305 if (j < n) ncols = j-i; 306 else ncols = n-i; 307 /* Now assemble all these values with a single function call */ 308 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 309 i = j; 310 } 311 } 312 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 313 314 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 315 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 316 317 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 318 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 319 } 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNCT__ 324 #define __FUNCT__ "MatZeroEntries_MPIDense" 325 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 326 { 327 PetscErrorCode ierr; 328 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 329 330 PetscFunctionBegin; 331 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 /* the code does not do the diagonal entries correctly unless the 336 matrix is square and the column and row owerships are identical. 337 This is a BUG. The only way to fix it seems to be to access 338 mdn->A and mdn->B directly and not through the MatZeroRows() 339 routine. 340 */ 341 #undef __FUNCT__ 342 #define __FUNCT__ "MatZeroRows_MPIDense" 343 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 344 { 345 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 346 PetscErrorCode ierr; 347 PetscInt i,*owners = A->rmap->range; 348 PetscInt *nprocs,j,idx,nsends; 349 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 350 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 351 PetscInt *lens,*lrows,*values; 352 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 353 MPI_Comm comm = ((PetscObject)A)->comm; 354 MPI_Request *send_waits,*recv_waits; 355 MPI_Status recv_status,*send_status; 356 PetscBool found; 357 const PetscScalar *xx; 358 PetscScalar *bb; 359 360 PetscFunctionBegin; 361 /* first count number of contributors to each processor */ 362 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 363 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 364 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 365 for (i=0; i<N; i++) { 366 idx = rows[i]; 367 found = PETSC_FALSE; 368 for (j=0; j<size; j++) { 369 if (idx >= owners[j] && idx < owners[j+1]) { 370 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 371 } 372 } 373 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 374 } 375 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 376 377 /* inform other processors of number of messages and max length*/ 378 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 379 380 /* post receives: */ 381 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 382 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 383 for (i=0; i<nrecvs; i++) { 384 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 385 } 386 387 /* do sends: 388 1) starts[i] gives the starting index in svalues for stuff going to 389 the ith processor 390 */ 391 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 392 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 393 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 394 starts[0] = 0; 395 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 396 for (i=0; i<N; i++) { 397 svalues[starts[owner[i]]++] = rows[i]; 398 } 399 400 starts[0] = 0; 401 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 402 count = 0; 403 for (i=0; i<size; i++) { 404 if (nprocs[2*i+1]) { 405 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 406 } 407 } 408 ierr = PetscFree(starts);CHKERRQ(ierr); 409 410 base = owners[rank]; 411 412 /* wait on receives */ 413 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 414 count = nrecvs; 415 slen = 0; 416 while (count) { 417 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 418 /* unpack receives into our local space */ 419 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 420 source[imdex] = recv_status.MPI_SOURCE; 421 lens[imdex] = n; 422 slen += n; 423 count--; 424 } 425 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 426 427 /* move the data into the send scatter */ 428 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 429 count = 0; 430 for (i=0; i<nrecvs; i++) { 431 values = rvalues + i*nmax; 432 for (j=0; j<lens[i]; j++) { 433 lrows[count++] = values[j] - base; 434 } 435 } 436 ierr = PetscFree(rvalues);CHKERRQ(ierr); 437 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 438 ierr = PetscFree(owner);CHKERRQ(ierr); 439 ierr = PetscFree(nprocs);CHKERRQ(ierr); 440 441 /* fix right hand side if needed */ 442 if (x && b) { 443 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 444 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 445 for (i=0; i<slen; i++) { 446 bb[lrows[i]] = diag*xx[lrows[i]]; 447 } 448 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 449 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 450 } 451 452 /* actually zap the local rows */ 453 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 454 ierr = PetscFree(lrows);CHKERRQ(ierr); 455 456 /* wait on sends */ 457 if (nsends) { 458 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 459 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 460 ierr = PetscFree(send_status);CHKERRQ(ierr); 461 } 462 ierr = PetscFree(send_waits);CHKERRQ(ierr); 463 ierr = PetscFree(svalues);CHKERRQ(ierr); 464 465 PetscFunctionReturn(0); 466 } 467 468 #undef __FUNCT__ 469 #define __FUNCT__ "MatMult_MPIDense" 470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 471 { 472 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 473 PetscErrorCode ierr; 474 475 PetscFunctionBegin; 476 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatMultAdd_MPIDense" 484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 491 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 492 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatMultTranspose_MPIDense" 498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 499 { 500 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 501 PetscErrorCode ierr; 502 PetscScalar zero = 0.0; 503 504 PetscFunctionBegin; 505 ierr = VecSet(yy,zero);CHKERRQ(ierr); 506 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 507 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 508 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 515 { 516 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 517 PetscErrorCode ierr; 518 519 PetscFunctionBegin; 520 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 521 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 522 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 523 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 524 PetscFunctionReturn(0); 525 } 526 527 #undef __FUNCT__ 528 #define __FUNCT__ "MatGetDiagonal_MPIDense" 529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 530 { 531 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 532 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 533 PetscErrorCode ierr; 534 PetscInt len,i,n,m = A->rmap->n,radd; 535 PetscScalar *x,zero = 0.0; 536 537 PetscFunctionBegin; 538 ierr = VecSet(v,zero);CHKERRQ(ierr); 539 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 540 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 541 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 542 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 543 radd = A->rmap->rstart*m; 544 for (i=0; i<len; i++) { 545 x[i] = aloc->v[radd + i*m + i]; 546 } 547 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatDestroy_MPIDense" 553 PetscErrorCode MatDestroy_MPIDense(Mat mat) 554 { 555 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 556 PetscErrorCode ierr; 557 #if defined(PETSC_HAVE_PLAPACK) 558 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 559 #endif 560 561 PetscFunctionBegin; 562 563 #if defined(PETSC_USE_LOG) 564 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 565 #endif 566 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 567 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 568 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 569 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 570 #if defined(PETSC_HAVE_PLAPACK) 571 if (lu) { 572 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 573 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 574 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 575 576 if (lu->is_pla) { 577 ierr = ISDestroy(&lu->is_pla);CHKERRQ(ierr); 578 ierr = ISDestroy(&lu->is_petsc);CHKERRQ(ierr); 579 ierr = VecScatterDestroy(&lu->ctx);CHKERRQ(ierr); 580 } 581 } 582 #endif 583 584 ierr = PetscFree(mdn);CHKERRQ(ierr); 585 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 594 #undef __FUNCT__ 595 #define __FUNCT__ "MatView_MPIDense_Binary" 596 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 597 { 598 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 599 PetscErrorCode ierr; 600 PetscViewerFormat format; 601 int fd; 602 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 603 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 604 PetscScalar *work,*v,*vv; 605 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 606 MPI_Status status; 607 608 PetscFunctionBegin; 609 if (mdn->size == 1) { 610 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 611 } else { 612 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 613 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 614 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 615 616 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 617 if (format == PETSC_VIEWER_NATIVE) { 618 619 if (!rank) { 620 /* store the matrix as a dense matrix */ 621 header[0] = MAT_FILE_CLASSID; 622 header[1] = mat->rmap->N; 623 header[2] = N; 624 header[3] = MATRIX_BINARY_FORMAT_DENSE; 625 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 626 627 /* get largest work array needed for transposing array */ 628 mmax = mat->rmap->n; 629 for (i=1; i<size; i++) { 630 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 631 } 632 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 633 634 /* write out local array, by rows */ 635 m = mat->rmap->n; 636 v = a->v; 637 for (j=0; j<N; j++) { 638 for (i=0; i<m; i++) { 639 work[j + i*N] = *v++; 640 } 641 } 642 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 643 /* get largest work array to receive messages from other processes, excludes process zero */ 644 mmax = 0; 645 for (i=1; i<size; i++) { 646 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 647 } 648 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 649 for(k = 1; k < size; k++) { 650 v = vv; 651 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 652 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 653 654 for(j = 0; j < N; j++) { 655 for(i = 0; i < m; i++) { 656 work[j + i*N] = *v++; 657 } 658 } 659 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 660 } 661 ierr = PetscFree(work);CHKERRQ(ierr); 662 ierr = PetscFree(vv);CHKERRQ(ierr); 663 } else { 664 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 665 } 666 } else { 667 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 668 } 669 } 670 PetscFunctionReturn(0); 671 } 672 673 #undef __FUNCT__ 674 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 675 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 676 { 677 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 678 PetscErrorCode ierr; 679 PetscMPIInt size = mdn->size,rank = mdn->rank; 680 const PetscViewerType vtype; 681 PetscBool iascii,isdraw; 682 PetscViewer sviewer; 683 PetscViewerFormat format; 684 #if defined(PETSC_HAVE_PLAPACK) 685 Mat_Plapack *lu; 686 #endif 687 688 PetscFunctionBegin; 689 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 690 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 691 if (iascii) { 692 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 693 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 694 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 695 MatInfo info; 696 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 697 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 698 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 699 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 700 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 701 #if defined(PETSC_HAVE_PLAPACK) 702 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 703 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 704 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 705 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 706 if (mat->factortype){ 707 lu=(Mat_Plapack*)(mat->spptr); 708 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 709 } 710 #else 711 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 712 #endif 713 PetscFunctionReturn(0); 714 } else if (format == PETSC_VIEWER_ASCII_INFO) { 715 PetscFunctionReturn(0); 716 } 717 } else if (isdraw) { 718 PetscDraw draw; 719 PetscBool isnull; 720 721 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 722 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 723 if (isnull) PetscFunctionReturn(0); 724 } 725 726 if (size == 1) { 727 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 728 } else { 729 /* assemble the entire matrix onto first processor. */ 730 Mat A; 731 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 732 PetscInt *cols; 733 PetscScalar *vals; 734 735 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 736 if (!rank) { 737 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 738 } else { 739 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 740 } 741 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 742 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 743 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 744 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 745 746 /* Copy the matrix ... This isn't the most efficient means, 747 but it's quick for now */ 748 A->insertmode = INSERT_VALUES; 749 row = mat->rmap->rstart; m = mdn->A->rmap->n; 750 for (i=0; i<m; i++) { 751 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 752 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 753 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 754 row++; 755 } 756 757 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 758 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 759 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 760 if (!rank) { 761 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 762 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 763 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 764 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 765 } 766 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 767 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 768 ierr = MatDestroy(&A);CHKERRQ(ierr); 769 } 770 PetscFunctionReturn(0); 771 } 772 773 #undef __FUNCT__ 774 #define __FUNCT__ "MatView_MPIDense" 775 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 776 { 777 PetscErrorCode ierr; 778 PetscBool iascii,isbinary,isdraw,issocket; 779 780 PetscFunctionBegin; 781 782 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 783 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 784 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 785 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 786 787 if (iascii || issocket || isdraw) { 788 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 789 } else if (isbinary) { 790 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 791 } else { 792 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 793 } 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatGetInfo_MPIDense" 799 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 800 { 801 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 802 Mat mdn = mat->A; 803 PetscErrorCode ierr; 804 PetscReal isend[5],irecv[5]; 805 806 PetscFunctionBegin; 807 info->block_size = 1.0; 808 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 809 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 810 isend[3] = info->memory; isend[4] = info->mallocs; 811 if (flag == MAT_LOCAL) { 812 info->nz_used = isend[0]; 813 info->nz_allocated = isend[1]; 814 info->nz_unneeded = isend[2]; 815 info->memory = isend[3]; 816 info->mallocs = isend[4]; 817 } else if (flag == MAT_GLOBAL_MAX) { 818 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 819 info->nz_used = irecv[0]; 820 info->nz_allocated = irecv[1]; 821 info->nz_unneeded = irecv[2]; 822 info->memory = irecv[3]; 823 info->mallocs = irecv[4]; 824 } else if (flag == MAT_GLOBAL_SUM) { 825 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 826 info->nz_used = irecv[0]; 827 info->nz_allocated = irecv[1]; 828 info->nz_unneeded = irecv[2]; 829 info->memory = irecv[3]; 830 info->mallocs = irecv[4]; 831 } 832 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 833 info->fill_ratio_needed = 0; 834 info->factor_mallocs = 0; 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatSetOption_MPIDense" 840 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 841 { 842 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 843 PetscErrorCode ierr; 844 845 PetscFunctionBegin; 846 switch (op) { 847 case MAT_NEW_NONZERO_LOCATIONS: 848 case MAT_NEW_NONZERO_LOCATION_ERR: 849 case MAT_NEW_NONZERO_ALLOCATION_ERR: 850 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 851 break; 852 case MAT_ROW_ORIENTED: 853 a->roworiented = flg; 854 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 855 break; 856 case MAT_NEW_DIAGONALS: 857 case MAT_USE_HASH_TABLE: 858 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 859 break; 860 case MAT_IGNORE_OFF_PROC_ENTRIES: 861 a->donotstash = flg; 862 break; 863 case MAT_SYMMETRIC: 864 case MAT_STRUCTURALLY_SYMMETRIC: 865 case MAT_HERMITIAN: 866 case MAT_SYMMETRY_ETERNAL: 867 case MAT_IGNORE_LOWER_TRIANGULAR: 868 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 869 break; 870 default: 871 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 872 } 873 PetscFunctionReturn(0); 874 } 875 876 877 #undef __FUNCT__ 878 #define __FUNCT__ "MatDiagonalScale_MPIDense" 879 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 880 { 881 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 882 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 883 PetscScalar *l,*r,x,*v; 884 PetscErrorCode ierr; 885 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 886 887 PetscFunctionBegin; 888 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 889 if (ll) { 890 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 891 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 892 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 893 for (i=0; i<m; i++) { 894 x = l[i]; 895 v = mat->v + i; 896 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 897 } 898 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 899 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 900 } 901 if (rr) { 902 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 903 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 904 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 907 for (i=0; i<n; i++) { 908 x = r[i]; 909 v = mat->v + i*m; 910 for (j=0; j<m; j++) { (*v++) *= x;} 911 } 912 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 913 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 914 } 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatNorm_MPIDense" 920 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 921 { 922 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 923 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 924 PetscErrorCode ierr; 925 PetscInt i,j; 926 PetscReal sum = 0.0; 927 PetscScalar *v = mat->v; 928 929 PetscFunctionBegin; 930 if (mdn->size == 1) { 931 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 932 } else { 933 if (type == NORM_FROBENIUS) { 934 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 935 #if defined(PETSC_USE_COMPLEX) 936 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 937 #else 938 sum += (*v)*(*v); v++; 939 #endif 940 } 941 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 942 *nrm = sqrt(*nrm); 943 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 944 } else if (type == NORM_1) { 945 PetscReal *tmp,*tmp2; 946 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 947 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 948 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 949 *nrm = 0.0; 950 v = mat->v; 951 for (j=0; j<mdn->A->cmap->n; j++) { 952 for (i=0; i<mdn->A->rmap->n; i++) { 953 tmp[j] += PetscAbsScalar(*v); v++; 954 } 955 } 956 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 957 for (j=0; j<A->cmap->N; j++) { 958 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 959 } 960 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 961 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 962 } else if (type == NORM_INFINITY) { /* max row norm */ 963 PetscReal ntemp; 964 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 965 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 966 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 967 } 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatTranspose_MPIDense" 973 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 974 { 975 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 976 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 977 Mat B; 978 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 979 PetscErrorCode ierr; 980 PetscInt j,i; 981 PetscScalar *v; 982 983 PetscFunctionBegin; 984 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 985 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 986 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 987 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 988 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 989 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 990 } else { 991 B = *matout; 992 } 993 994 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 995 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 996 for (i=0; i<m; i++) rwork[i] = rstart + i; 997 for (j=0; j<n; j++) { 998 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 999 v += m; 1000 } 1001 ierr = PetscFree(rwork);CHKERRQ(ierr); 1002 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1003 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1004 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1005 *matout = B; 1006 } else { 1007 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 1013 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1014 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1018 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1019 { 1020 PetscErrorCode ierr; 1021 1022 PetscFunctionBegin; 1023 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #if defined(PETSC_HAVE_PLAPACK) 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1031 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1032 { 1033 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1034 PetscErrorCode ierr; 1035 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1036 PetscScalar *array; 1037 PetscReal one = 1.0; 1038 1039 PetscFunctionBegin; 1040 /* Copy A into F->lu->A */ 1041 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1042 ierr = PLA_API_begin();CHKERRQ(ierr); 1043 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1044 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1045 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1046 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1047 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1048 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1049 ierr = PLA_API_end();CHKERRQ(ierr); 1050 lu->rstart = rstart; 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1056 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1057 { 1058 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1059 PetscErrorCode ierr; 1060 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1061 PetscScalar *array; 1062 PetscReal one = 1.0; 1063 1064 PetscFunctionBegin; 1065 /* Copy F into A->lu->A */ 1066 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1067 ierr = PLA_API_begin();CHKERRQ(ierr); 1068 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1069 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1070 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1071 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1072 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1073 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1074 ierr = PLA_API_end();CHKERRQ(ierr); 1075 lu->rstart = rstart; 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1081 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1082 { 1083 PetscErrorCode ierr; 1084 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1085 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1086 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1087 PLA_Obj alpha = NULL,beta = NULL; 1088 1089 PetscFunctionBegin; 1090 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1091 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1092 1093 /* 1094 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1095 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1096 */ 1097 1098 /* do the multiply in PLA */ 1099 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1100 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1101 CHKMEMQ; 1102 1103 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1104 CHKMEMQ; 1105 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1106 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1107 1108 /* 1109 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1110 */ 1111 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1117 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1118 { 1119 PetscErrorCode ierr; 1120 PetscInt m=A->rmap->n,n=B->cmap->n; 1121 Mat Cmat; 1122 1123 PetscFunctionBegin; 1124 if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1125 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1126 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1127 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1128 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1129 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1130 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1131 1132 *C = Cmat; 1133 PetscFunctionReturn(0); 1134 } 1135 1136 #undef __FUNCT__ 1137 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1138 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1139 { 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 if (scall == MAT_INITIAL_MATRIX){ 1144 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1145 } 1146 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatSolve_MPIDense" 1152 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1153 { 1154 MPI_Comm comm = ((PetscObject)A)->comm; 1155 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1156 PetscErrorCode ierr; 1157 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1158 PetscScalar *array; 1159 PetscReal one = 1.0; 1160 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1161 PLA_Obj v_pla = NULL; 1162 PetscScalar *loc_buf; 1163 Vec loc_x; 1164 1165 PetscFunctionBegin; 1166 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1167 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1168 1169 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1170 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1171 PLA_Obj_set_to_zero(v_pla); 1172 1173 /* Copy b into rhs_pla */ 1174 PLA_API_begin(); 1175 PLA_Obj_API_open(v_pla); 1176 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1177 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1178 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1179 PLA_Obj_API_close(v_pla); 1180 PLA_API_end(); 1181 1182 if (A->factortype == MAT_FACTOR_LU){ 1183 /* Apply the permutations to the right hand sides */ 1184 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1185 1186 /* Solve L y = b, overwriting b with y */ 1187 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1188 1189 /* Solve U x = y (=b), overwriting b with x */ 1190 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1191 } else { /* MAT_FACTOR_CHOLESKY */ 1192 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1193 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1194 PLA_NONUNIT_DIAG,lu->A,v_pla); 1195 } 1196 1197 /* Copy PLAPACK x into Petsc vector x */ 1198 PLA_Obj_local_length(v_pla, &loc_m); 1199 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1200 PLA_Obj_local_stride(v_pla, &loc_stride); 1201 /* 1202 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1203 */ 1204 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1205 if (!lu->pla_solved){ 1206 1207 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1208 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1209 1210 /* Create IS and cts for VecScatterring */ 1211 PLA_Obj_local_length(v_pla, &loc_m); 1212 PLA_Obj_local_stride(v_pla, &loc_stride); 1213 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1214 1215 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1216 for (i=0; i<loc_m; i+=lu->nb){ 1217 j = 0; 1218 while (j < lu->nb && i+j < loc_m){ 1219 idx_petsc[i+j] = rstart + j; j++; 1220 } 1221 rstart += size*lu->nb; 1222 } 1223 1224 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1225 1226 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);CHKERRQ(ierr); 1227 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);CHKERRQ(ierr); 1228 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1229 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1230 } 1231 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1233 1234 /* Free data */ 1235 ierr = VecDestroy(&loc_x);CHKERRQ(ierr); 1236 PLA_Obj_free(&v_pla); 1237 1238 lu->pla_solved = PETSC_TRUE; 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1244 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1245 { 1246 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1247 PetscErrorCode ierr; 1248 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1249 PetscInt info_pla=0; 1250 PetscScalar *array,one = 1.0; 1251 1252 PetscFunctionBegin; 1253 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1254 PLA_Obj_free(&lu->A); 1255 PLA_Obj_free (&lu->pivots); 1256 } 1257 /* Create PLAPACK matrix object */ 1258 lu->A = NULL; lu->pivots = NULL; 1259 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1260 PLA_Obj_set_to_zero(lu->A); 1261 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1262 1263 /* Copy A into lu->A */ 1264 PLA_API_begin(); 1265 PLA_Obj_API_open(lu->A); 1266 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1267 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1268 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1269 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1270 PLA_Obj_API_close(lu->A); 1271 PLA_API_end(); 1272 1273 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1274 info_pla = PLA_LU(lu->A,lu->pivots); 1275 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1276 1277 lu->rstart = rstart; 1278 lu->mstruct = SAME_NONZERO_PATTERN; 1279 F->ops->solve = MatSolve_MPIDense; 1280 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1286 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1287 { 1288 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1289 PetscErrorCode ierr; 1290 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1291 PetscInt info_pla=0; 1292 PetscScalar *array,one = 1.0; 1293 1294 PetscFunctionBegin; 1295 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1296 PLA_Obj_free(&lu->A); 1297 } 1298 /* Create PLAPACK matrix object */ 1299 lu->A = NULL; 1300 lu->pivots = NULL; 1301 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1302 1303 /* Copy A into lu->A */ 1304 PLA_API_begin(); 1305 PLA_Obj_API_open(lu->A); 1306 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1307 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1308 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1309 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1310 PLA_Obj_API_close(lu->A); 1311 PLA_API_end(); 1312 1313 /* Factor P A -> Chol */ 1314 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1315 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1316 1317 lu->rstart = rstart; 1318 lu->mstruct = SAME_NONZERO_PATTERN; 1319 F->ops->solve = MatSolve_MPIDense; 1320 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1321 PetscFunctionReturn(0); 1322 } 1323 1324 /* Note the Petsc perm permutation is ignored */ 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1327 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1328 { 1329 PetscErrorCode ierr; 1330 PetscBool issymmetric,set; 1331 1332 PetscFunctionBegin; 1333 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1334 if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1335 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1336 PetscFunctionReturn(0); 1337 } 1338 1339 /* Note the Petsc r and c permutations are ignored */ 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1342 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1343 { 1344 PetscErrorCode ierr; 1345 PetscInt M = A->rmap->N; 1346 Mat_Plapack *lu; 1347 1348 PetscFunctionBegin; 1349 lu = (Mat_Plapack*)F->spptr; 1350 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1351 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1352 PetscFunctionReturn(0); 1353 } 1354 1355 EXTERN_C_BEGIN 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1358 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1359 { 1360 PetscFunctionBegin; 1361 *type = MATSOLVERPLAPACK; 1362 PetscFunctionReturn(0); 1363 } 1364 EXTERN_C_END 1365 1366 EXTERN_C_BEGIN 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1369 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1370 { 1371 PetscErrorCode ierr; 1372 Mat_Plapack *lu; 1373 PetscMPIInt size; 1374 PetscInt M=A->rmap->N; 1375 1376 PetscFunctionBegin; 1377 /* Create the factorization matrix */ 1378 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1379 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1380 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1381 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1382 (*F)->spptr = (void*)lu; 1383 1384 /* Set default Plapack parameters */ 1385 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1386 lu->nb = M/size; 1387 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1388 1389 /* Set runtime options */ 1390 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1391 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1392 PetscOptionsEnd(); 1393 1394 /* Create object distribution template */ 1395 lu->templ = NULL; 1396 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1397 1398 /* Set the datatype */ 1399 #if defined(PETSC_USE_COMPLEX) 1400 lu->datatype = MPI_DOUBLE_COMPLEX; 1401 #else 1402 lu->datatype = MPI_DOUBLE; 1403 #endif 1404 1405 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1406 1407 1408 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1409 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1410 1411 if (ftype == MAT_FACTOR_LU) { 1412 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1413 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1414 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1415 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1416 (*F)->factortype = ftype; 1417 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 EXTERN_C_END 1421 #endif 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1425 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1426 { 1427 #if defined(PETSC_HAVE_PLAPACK) 1428 PetscErrorCode ierr; 1429 #endif 1430 1431 PetscFunctionBegin; 1432 #if defined(PETSC_HAVE_PLAPACK) 1433 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1434 #else 1435 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1436 #endif 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatAXPY_MPIDense" 1442 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1443 { 1444 PetscErrorCode ierr; 1445 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1446 1447 PetscFunctionBegin; 1448 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatConjugate_MPIDense" 1454 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1455 { 1456 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1457 PetscErrorCode ierr; 1458 1459 PetscFunctionBegin; 1460 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatRealPart_MPIDense" 1466 PetscErrorCode MatRealPart_MPIDense(Mat A) 1467 { 1468 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1469 PetscErrorCode ierr; 1470 1471 PetscFunctionBegin; 1472 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1478 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1479 { 1480 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1481 PetscErrorCode ierr; 1482 1483 PetscFunctionBegin; 1484 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 /* -------------------------------------------------------------------*/ 1489 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1490 MatGetRow_MPIDense, 1491 MatRestoreRow_MPIDense, 1492 MatMult_MPIDense, 1493 /* 4*/ MatMultAdd_MPIDense, 1494 MatMultTranspose_MPIDense, 1495 MatMultTransposeAdd_MPIDense, 1496 0, 1497 0, 1498 0, 1499 /*10*/ 0, 1500 0, 1501 0, 1502 0, 1503 MatTranspose_MPIDense, 1504 /*15*/ MatGetInfo_MPIDense, 1505 MatEqual_MPIDense, 1506 MatGetDiagonal_MPIDense, 1507 MatDiagonalScale_MPIDense, 1508 MatNorm_MPIDense, 1509 /*20*/ MatAssemblyBegin_MPIDense, 1510 MatAssemblyEnd_MPIDense, 1511 MatSetOption_MPIDense, 1512 MatZeroEntries_MPIDense, 1513 /*24*/ MatZeroRows_MPIDense, 1514 0, 1515 0, 1516 0, 1517 0, 1518 /*29*/ MatSetUpPreallocation_MPIDense, 1519 0, 1520 0, 1521 MatGetArray_MPIDense, 1522 MatRestoreArray_MPIDense, 1523 /*34*/ MatDuplicate_MPIDense, 1524 0, 1525 0, 1526 0, 1527 0, 1528 /*39*/ MatAXPY_MPIDense, 1529 MatGetSubMatrices_MPIDense, 1530 0, 1531 MatGetValues_MPIDense, 1532 0, 1533 /*44*/ 0, 1534 MatScale_MPIDense, 1535 0, 1536 0, 1537 0, 1538 /*49*/ 0, 1539 0, 1540 0, 1541 0, 1542 0, 1543 /*54*/ 0, 1544 0, 1545 0, 1546 0, 1547 0, 1548 /*59*/ MatGetSubMatrix_MPIDense, 1549 MatDestroy_MPIDense, 1550 MatView_MPIDense, 1551 0, 1552 0, 1553 /*64*/ 0, 1554 0, 1555 0, 1556 0, 1557 0, 1558 /*69*/ 0, 1559 0, 1560 0, 1561 0, 1562 0, 1563 /*74*/ 0, 1564 0, 1565 0, 1566 0, 1567 0, 1568 /*79*/ 0, 1569 0, 1570 0, 1571 0, 1572 /*83*/ MatLoad_MPIDense, 1573 0, 1574 0, 1575 0, 1576 0, 1577 0, 1578 /*89*/ 1579 #if defined(PETSC_HAVE_PLAPACK) 1580 MatMatMult_MPIDense_MPIDense, 1581 MatMatMultSymbolic_MPIDense_MPIDense, 1582 MatMatMultNumeric_MPIDense_MPIDense, 1583 #else 1584 0, 1585 0, 1586 0, 1587 #endif 1588 0, 1589 0, 1590 /*94*/ 0, 1591 0, 1592 0, 1593 0, 1594 0, 1595 /*99*/ 0, 1596 0, 1597 0, 1598 MatConjugate_MPIDense, 1599 0, 1600 /*104*/0, 1601 MatRealPart_MPIDense, 1602 MatImaginaryPart_MPIDense, 1603 0, 1604 0, 1605 /*109*/0, 1606 0, 1607 0, 1608 0, 1609 0, 1610 /*114*/0, 1611 0, 1612 0, 1613 0, 1614 0, 1615 /*119*/0, 1616 0, 1617 0, 1618 0 1619 }; 1620 1621 EXTERN_C_BEGIN 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1624 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1625 { 1626 Mat_MPIDense *a; 1627 PetscErrorCode ierr; 1628 1629 PetscFunctionBegin; 1630 mat->preallocated = PETSC_TRUE; 1631 /* Note: For now, when data is specified above, this assumes the user correctly 1632 allocates the local dense storage space. We should add error checking. */ 1633 1634 a = (Mat_MPIDense*)mat->data; 1635 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1636 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1637 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1638 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1639 a->nvec = mat->cmap->n; 1640 1641 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1642 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1643 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1644 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1645 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1646 PetscFunctionReturn(0); 1647 } 1648 EXTERN_C_END 1649 1650 /*MC 1651 MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1652 1653 run ./configure with the option --download-plapack 1654 1655 1656 Options Database Keys: 1657 . -mat_plapack_nprows <n> - number of rows in processor partition 1658 . -mat_plapack_npcols <n> - number of columns in processor partition 1659 . -mat_plapack_nb <n> - block size of template vector 1660 . -mat_plapack_nb_alg <n> - algorithmic block size 1661 - -mat_plapack_ckerror <n> - error checking flag 1662 1663 Level: intermediate 1664 1665 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1666 1667 M*/ 1668 1669 EXTERN_C_BEGIN 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "MatCreate_MPIDense" 1672 PetscErrorCode MatCreate_MPIDense(Mat mat) 1673 { 1674 Mat_MPIDense *a; 1675 PetscErrorCode ierr; 1676 1677 PetscFunctionBegin; 1678 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1679 mat->data = (void*)a; 1680 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1681 1682 mat->insertmode = NOT_SET_VALUES; 1683 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1684 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1685 1686 /* build cache for off array entries formed */ 1687 a->donotstash = PETSC_FALSE; 1688 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1689 1690 /* stuff used for matrix vector multiply */ 1691 a->lvec = 0; 1692 a->Mvctx = 0; 1693 a->roworiented = PETSC_TRUE; 1694 1695 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1696 "MatGetDiagonalBlock_MPIDense", 1697 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1698 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1699 "MatMPIDenseSetPreallocation_MPIDense", 1700 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1701 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1702 "MatMatMult_MPIAIJ_MPIDense", 1703 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1704 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1705 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1706 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1707 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1708 "MatMatMultNumeric_MPIAIJ_MPIDense", 1709 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1710 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1711 "MatGetFactor_mpidense_petsc", 1712 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1713 #if defined(PETSC_HAVE_PLAPACK) 1714 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1715 "MatGetFactor_mpidense_plapack", 1716 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1717 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1718 #endif 1719 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1720 1721 PetscFunctionReturn(0); 1722 } 1723 EXTERN_C_END 1724 1725 /*MC 1726 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1727 1728 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1729 and MATMPIDENSE otherwise. 1730 1731 Options Database Keys: 1732 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1733 1734 Level: beginner 1735 1736 1737 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1738 M*/ 1739 1740 EXTERN_C_BEGIN 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatCreate_Dense" 1743 PetscErrorCode MatCreate_Dense(Mat A) 1744 { 1745 PetscErrorCode ierr; 1746 PetscMPIInt size; 1747 1748 PetscFunctionBegin; 1749 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1750 if (size == 1) { 1751 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1752 } else { 1753 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1754 } 1755 PetscFunctionReturn(0); 1756 } 1757 EXTERN_C_END 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1761 /*@C 1762 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1763 1764 Not collective 1765 1766 Input Parameters: 1767 . A - the matrix 1768 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1769 to control all matrix memory allocation. 1770 1771 Notes: 1772 The dense format is fully compatible with standard Fortran 77 1773 storage by columns. 1774 1775 The data input variable is intended primarily for Fortran programmers 1776 who wish to allocate their own matrix memory space. Most users should 1777 set data=PETSC_NULL. 1778 1779 Level: intermediate 1780 1781 .keywords: matrix,dense, parallel 1782 1783 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1784 @*/ 1785 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1786 { 1787 PetscErrorCode ierr; 1788 1789 PetscFunctionBegin; 1790 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatCreateMPIDense" 1796 /*@C 1797 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1798 1799 Collective on MPI_Comm 1800 1801 Input Parameters: 1802 + comm - MPI communicator 1803 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1804 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1805 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1806 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1807 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1808 to control all matrix memory allocation. 1809 1810 Output Parameter: 1811 . A - the matrix 1812 1813 Notes: 1814 The dense format is fully compatible with standard Fortran 77 1815 storage by columns. 1816 1817 The data input variable is intended primarily for Fortran programmers 1818 who wish to allocate their own matrix memory space. Most users should 1819 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1820 1821 The user MUST specify either the local or global matrix dimensions 1822 (possibly both). 1823 1824 Level: intermediate 1825 1826 .keywords: matrix,dense, parallel 1827 1828 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1829 @*/ 1830 PetscErrorCode MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1831 { 1832 PetscErrorCode ierr; 1833 PetscMPIInt size; 1834 1835 PetscFunctionBegin; 1836 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1837 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1838 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1839 if (size > 1) { 1840 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1841 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1842 } else { 1843 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1844 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1845 } 1846 PetscFunctionReturn(0); 1847 } 1848 1849 #undef __FUNCT__ 1850 #define __FUNCT__ "MatDuplicate_MPIDense" 1851 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1852 { 1853 Mat mat; 1854 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1855 PetscErrorCode ierr; 1856 1857 PetscFunctionBegin; 1858 *newmat = 0; 1859 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1860 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1861 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1862 a = (Mat_MPIDense*)mat->data; 1863 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1864 1865 mat->factortype = A->factortype; 1866 mat->assembled = PETSC_TRUE; 1867 mat->preallocated = PETSC_TRUE; 1868 1869 a->size = oldmat->size; 1870 a->rank = oldmat->rank; 1871 mat->insertmode = NOT_SET_VALUES; 1872 a->nvec = oldmat->nvec; 1873 a->donotstash = oldmat->donotstash; 1874 1875 ierr = PetscLayoutCopy(A->rmap,&mat->rmap);CHKERRQ(ierr); 1876 ierr = PetscLayoutCopy(A->cmap,&mat->cmap);CHKERRQ(ierr); 1877 1878 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1879 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1880 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1881 1882 *newmat = mat; 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1888 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1889 { 1890 PetscErrorCode ierr; 1891 PetscMPIInt rank,size; 1892 PetscInt *rowners,i,m,nz,j; 1893 PetscScalar *array,*vals,*vals_ptr; 1894 MPI_Status status; 1895 1896 PetscFunctionBegin; 1897 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1898 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1899 1900 /* determine ownership of all rows */ 1901 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1902 else m = newmat->rmap->n; 1903 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1904 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1905 rowners[0] = 0; 1906 for (i=2; i<=size; i++) { 1907 rowners[i] += rowners[i-1]; 1908 } 1909 1910 if (!sizesset) { 1911 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1912 } 1913 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1914 ierr = MatGetArray(newmat,&array);CHKERRQ(ierr); 1915 1916 if (!rank) { 1917 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1918 1919 /* read in my part of the matrix numerical values */ 1920 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1921 1922 /* insert into matrix-by row (this is why cannot directly read into array */ 1923 vals_ptr = vals; 1924 for (i=0; i<m; i++) { 1925 for (j=0; j<N; j++) { 1926 array[i + j*m] = *vals_ptr++; 1927 } 1928 } 1929 1930 /* read in other processors and ship out */ 1931 for (i=1; i<size; i++) { 1932 nz = (rowners[i+1] - rowners[i])*N; 1933 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1934 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1935 } 1936 } else { 1937 /* receive numeric values */ 1938 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1939 1940 /* receive message of values*/ 1941 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr); 1942 1943 /* insert into matrix-by row (this is why cannot directly read into array */ 1944 vals_ptr = vals; 1945 for (i=0; i<m; i++) { 1946 for (j=0; j<N; j++) { 1947 array[i + j*m] = *vals_ptr++; 1948 } 1949 } 1950 } 1951 ierr = PetscFree(rowners);CHKERRQ(ierr); 1952 ierr = PetscFree(vals);CHKERRQ(ierr); 1953 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1954 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #undef __FUNCT__ 1959 #define __FUNCT__ "MatLoad_MPIDense" 1960 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1961 { 1962 PetscScalar *vals,*svals; 1963 MPI_Comm comm = ((PetscObject)viewer)->comm; 1964 MPI_Status status; 1965 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1966 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1967 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1968 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1969 int fd; 1970 PetscErrorCode ierr; 1971 1972 PetscFunctionBegin; 1973 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1974 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1975 if (!rank) { 1976 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1977 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1978 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1979 } 1980 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1981 1982 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1983 M = header[1]; N = header[2]; nz = header[3]; 1984 1985 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1986 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1987 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1988 1989 /* If global sizes are set, check if they are consistent with that given in the file */ 1990 if (sizesset) { 1991 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1992 } 1993 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1994 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1995 1996 /* 1997 Handle case where matrix is stored on disk as a dense matrix 1998 */ 1999 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 2000 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 2001 PetscFunctionReturn(0); 2002 } 2003 2004 /* determine ownership of all rows */ 2005 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 2006 else m = PetscMPIIntCast(newmat->rmap->n); 2007 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2008 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2009 rowners[0] = 0; 2010 for (i=2; i<=size; i++) { 2011 rowners[i] += rowners[i-1]; 2012 } 2013 rstart = rowners[rank]; 2014 rend = rowners[rank+1]; 2015 2016 /* distribute row lengths to all processors */ 2017 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 2018 if (!rank) { 2019 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2020 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2021 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2022 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2023 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2024 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2025 } else { 2026 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2027 } 2028 2029 if (!rank) { 2030 /* calculate the number of nonzeros on each processor */ 2031 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2032 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2033 for (i=0; i<size; i++) { 2034 for (j=rowners[i]; j< rowners[i+1]; j++) { 2035 procsnz[i] += rowlengths[j]; 2036 } 2037 } 2038 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2039 2040 /* determine max buffer needed and allocate it */ 2041 maxnz = 0; 2042 for (i=0; i<size; i++) { 2043 maxnz = PetscMax(maxnz,procsnz[i]); 2044 } 2045 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2046 2047 /* read in my part of the matrix column indices */ 2048 nz = procsnz[0]; 2049 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2050 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2051 2052 /* read in every one elses and ship off */ 2053 for (i=1; i<size; i++) { 2054 nz = procsnz[i]; 2055 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2056 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2057 } 2058 ierr = PetscFree(cols);CHKERRQ(ierr); 2059 } else { 2060 /* determine buffer space needed for message */ 2061 nz = 0; 2062 for (i=0; i<m; i++) { 2063 nz += ourlens[i]; 2064 } 2065 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2066 2067 /* receive message of column indices*/ 2068 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2069 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2070 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2071 } 2072 2073 /* loop over local rows, determining number of off diagonal entries */ 2074 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2075 jj = 0; 2076 for (i=0; i<m; i++) { 2077 for (j=0; j<ourlens[i]; j++) { 2078 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2079 jj++; 2080 } 2081 } 2082 2083 /* create our matrix */ 2084 for (i=0; i<m; i++) { 2085 ourlens[i] -= offlens[i]; 2086 } 2087 2088 if (!sizesset) { 2089 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2090 } 2091 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 2092 for (i=0; i<m; i++) { 2093 ourlens[i] += offlens[i]; 2094 } 2095 2096 if (!rank) { 2097 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2098 2099 /* read in my part of the matrix numerical values */ 2100 nz = procsnz[0]; 2101 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2102 2103 /* insert into matrix */ 2104 jj = rstart; 2105 smycols = mycols; 2106 svals = vals; 2107 for (i=0; i<m; i++) { 2108 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2109 smycols += ourlens[i]; 2110 svals += ourlens[i]; 2111 jj++; 2112 } 2113 2114 /* read in other processors and ship out */ 2115 for (i=1; i<size; i++) { 2116 nz = procsnz[i]; 2117 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2118 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2119 } 2120 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2121 } else { 2122 /* receive numeric values */ 2123 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2124 2125 /* receive message of values*/ 2126 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2127 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2128 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2129 2130 /* insert into matrix */ 2131 jj = rstart; 2132 smycols = mycols; 2133 svals = vals; 2134 for (i=0; i<m; i++) { 2135 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2136 smycols += ourlens[i]; 2137 svals += ourlens[i]; 2138 jj++; 2139 } 2140 } 2141 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2142 ierr = PetscFree(vals);CHKERRQ(ierr); 2143 ierr = PetscFree(mycols);CHKERRQ(ierr); 2144 ierr = PetscFree(rowners);CHKERRQ(ierr); 2145 2146 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2147 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatEqual_MPIDense" 2153 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2154 { 2155 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2156 Mat a,b; 2157 PetscBool flg; 2158 PetscErrorCode ierr; 2159 2160 PetscFunctionBegin; 2161 a = matA->A; 2162 b = matB->A; 2163 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2164 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #if defined(PETSC_HAVE_PLAPACK) 2169 2170 #undef __FUNCT__ 2171 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2172 /*@C 2173 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2174 Level: developer 2175 2176 .keywords: Petsc, destroy, package, PLAPACK 2177 .seealso: PetscFinalize() 2178 @*/ 2179 PetscErrorCode PetscPLAPACKFinalizePackage(void) 2180 { 2181 PetscErrorCode ierr; 2182 2183 PetscFunctionBegin; 2184 ierr = PLA_Finalize();CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 #undef __FUNCT__ 2189 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2190 /*@C 2191 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2192 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2193 2194 Input Parameter: 2195 . comm - the communicator the matrix lives on 2196 2197 Level: developer 2198 2199 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2200 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2201 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2202 cannot overlap. 2203 2204 .keywords: Petsc, initialize, package, PLAPACK 2205 .seealso: PetscSysInitializePackage(), PetscInitialize() 2206 @*/ 2207 PetscErrorCode PetscPLAPACKInitializePackage(MPI_Comm comm) 2208 { 2209 PetscMPIInt size; 2210 PetscErrorCode ierr; 2211 2212 PetscFunctionBegin; 2213 if (!PLA_Initialized(PETSC_NULL)) { 2214 2215 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2216 Plapack_nprows = 1; 2217 Plapack_npcols = size; 2218 2219 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2220 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2221 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2222 #if defined(PETSC_USE_DEBUG) 2223 Plapack_ierror = 3; 2224 #else 2225 Plapack_ierror = 0; 2226 #endif 2227 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2228 if (Plapack_ierror){ 2229 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2230 } else { 2231 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2232 } 2233 2234 Plapack_nb_alg = 0; 2235 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2236 if (Plapack_nb_alg) { 2237 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2238 } 2239 PetscOptionsEnd(); 2240 2241 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2242 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2243 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2244 } 2245 PetscFunctionReturn(0); 2246 } 2247 2248 #endif 2249