1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 EXTERN_C_BEGIN 13 #include "PLA.h" 14 EXTERN_C_END 15 16 typedef struct { 17 PLA_Obj A,pivots; 18 PLA_Template templ; 19 MPI_Datatype datatype; 20 PetscInt nb,rstart; 21 VecScatter ctx; 22 IS is_pla,is_petsc; 23 PetscBool pla_solved; 24 MatStructure mstruct; 25 } Mat_Plapack; 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatDenseGetLocalMatrix" 30 /*@ 31 32 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 33 matrix that represents the operator. For sequential matrices it returns itself. 34 35 Input Parameter: 36 . A - the Seq or MPI dense matrix 37 38 Output Parameter: 39 . B - the inner matrix 40 41 Level: intermediate 42 43 @*/ 44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 45 { 46 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 47 PetscErrorCode ierr; 48 PetscBool flg; 49 50 PetscFunctionBegin; 51 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 52 if (flg) { 53 *B = mat->A; 54 } else { 55 *B = A; 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatGetRow_MPIDense" 62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 63 { 64 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 65 PetscErrorCode ierr; 66 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 67 68 PetscFunctionBegin; 69 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 70 lrow = row - rstart; 71 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatRestoreRow_MPIDense" 77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 83 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 84 PetscFunctionReturn(0); 85 } 86 87 EXTERN_C_BEGIN 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *B) 91 { 92 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 93 PetscErrorCode ierr; 94 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 95 PetscScalar *array; 96 MPI_Comm comm; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 100 101 /* The reuse aspect is not implemented efficiently */ 102 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 103 104 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 105 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 106 ierr = MatCreate(comm,B);CHKERRQ(ierr); 107 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 108 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 109 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 110 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 111 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 113 114 *iscopy = PETSC_TRUE; 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatSetValues_MPIDense" 121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 122 { 123 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 124 PetscErrorCode ierr; 125 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 126 PetscBool roworiented = A->roworiented; 127 128 PetscFunctionBegin; 129 if (v) PetscValidScalarPointer(v,6); 130 for (i=0; i<m; i++) { 131 if (idxm[i] < 0) continue; 132 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 133 if (idxm[i] >= rstart && idxm[i] < rend) { 134 row = idxm[i] - rstart; 135 if (roworiented) { 136 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 137 } else { 138 for (j=0; j<n; j++) { 139 if (idxn[j] < 0) continue; 140 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 141 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 142 } 143 } 144 } else { 145 if (!A->donotstash) { 146 if (roworiented) { 147 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 148 } else { 149 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 150 } 151 } 152 } 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetValues_MPIDense" 159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 160 { 161 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 164 165 PetscFunctionBegin; 166 for (i=0; i<m; i++) { 167 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 168 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 169 if (idxm[i] >= rstart && idxm[i] < rend) { 170 row = idxm[i] - rstart; 171 for (j=0; j<n; j++) { 172 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 173 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 174 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 175 } 176 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 177 } 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatGetArray_MPIDense" 183 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 184 { 185 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 186 PetscErrorCode ierr; 187 188 PetscFunctionBegin; 189 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 195 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 196 { 197 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 198 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 199 PetscErrorCode ierr; 200 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 201 const PetscInt *irow,*icol; 202 PetscScalar *av,*bv,*v = lmat->v; 203 Mat newmat; 204 IS iscol_local; 205 206 PetscFunctionBegin; 207 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 208 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 209 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 210 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 211 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 212 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 213 214 /* No parallel redistribution currently supported! Should really check each index set 215 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 216 original matrix! */ 217 218 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 219 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 220 221 /* Check submatrix call */ 222 if (scall == MAT_REUSE_MATRIX) { 223 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 224 /* Really need to test rows and column sizes! */ 225 newmat = *B; 226 } else { 227 /* Create and fill new matrix */ 228 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 229 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 230 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 231 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 232 } 233 234 /* Now extract the data pointers and do the copy, column at a time */ 235 newmatd = (Mat_MPIDense*)newmat->data; 236 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 237 238 for (i=0; i<Ncols; i++) { 239 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 240 for (j=0; j<nrows; j++) { 241 *bv++ = av[irow[j] - rstart]; 242 } 243 } 244 245 /* Assemble the matrices so that the correct flags are set */ 246 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 248 249 /* Free work space */ 250 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 251 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 252 *B = newmat; 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatRestoreArray_MPIDense" 258 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 259 { 260 PetscFunctionBegin; 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 266 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 267 { 268 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 269 MPI_Comm comm = ((PetscObject)mat)->comm; 270 PetscErrorCode ierr; 271 PetscInt nstash,reallocs; 272 InsertMode addv; 273 274 PetscFunctionBegin; 275 /* make sure all processors are either in INSERTMODE or ADDMODE */ 276 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 277 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 278 mat->insertmode = addv; /* in case this processor had no cache */ 279 280 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 281 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 282 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 288 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 289 { 290 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 291 PetscErrorCode ierr; 292 PetscInt i,*row,*col,flg,j,rstart,ncols; 293 PetscMPIInt n; 294 PetscScalar *val; 295 InsertMode addv=mat->insertmode; 296 297 PetscFunctionBegin; 298 /* wait on receives */ 299 while (1) { 300 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 301 if (!flg) break; 302 303 for (i=0; i<n;) { 304 /* Now identify the consecutive vals belonging to the same row */ 305 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 306 if (j < n) ncols = j-i; 307 else ncols = n-i; 308 /* Now assemble all these values with a single function call */ 309 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 310 i = j; 311 } 312 } 313 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 314 315 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 316 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 317 318 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 319 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatZeroEntries_MPIDense" 326 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 327 { 328 PetscErrorCode ierr; 329 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 330 331 PetscFunctionBegin; 332 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 333 PetscFunctionReturn(0); 334 } 335 336 /* the code does not do the diagonal entries correctly unless the 337 matrix is square and the column and row owerships are identical. 338 This is a BUG. The only way to fix it seems to be to access 339 mdn->A and mdn->B directly and not through the MatZeroRows() 340 routine. 341 */ 342 #undef __FUNCT__ 343 #define __FUNCT__ "MatZeroRows_MPIDense" 344 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 345 { 346 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 347 PetscErrorCode ierr; 348 PetscInt i,*owners = A->rmap->range; 349 PetscInt *nprocs,j,idx,nsends; 350 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 351 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 352 PetscInt *lens,*lrows,*values; 353 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 354 MPI_Comm comm = ((PetscObject)A)->comm; 355 MPI_Request *send_waits,*recv_waits; 356 MPI_Status recv_status,*send_status; 357 PetscBool found; 358 const PetscScalar *xx; 359 PetscScalar *bb; 360 361 PetscFunctionBegin; 362 /* first count number of contributors to each processor */ 363 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 364 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 365 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 366 for (i=0; i<N; i++) { 367 idx = rows[i]; 368 found = PETSC_FALSE; 369 for (j=0; j<size; j++) { 370 if (idx >= owners[j] && idx < owners[j+1]) { 371 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 372 } 373 } 374 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 375 } 376 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 377 378 /* inform other processors of number of messages and max length*/ 379 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 380 381 /* post receives: */ 382 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 383 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 384 for (i=0; i<nrecvs; i++) { 385 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 386 } 387 388 /* do sends: 389 1) starts[i] gives the starting index in svalues for stuff going to 390 the ith processor 391 */ 392 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 393 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 394 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 395 starts[0] = 0; 396 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 397 for (i=0; i<N; i++) { 398 svalues[starts[owner[i]]++] = rows[i]; 399 } 400 401 starts[0] = 0; 402 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 403 count = 0; 404 for (i=0; i<size; i++) { 405 if (nprocs[2*i+1]) { 406 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 407 } 408 } 409 ierr = PetscFree(starts);CHKERRQ(ierr); 410 411 base = owners[rank]; 412 413 /* wait on receives */ 414 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 415 count = nrecvs; 416 slen = 0; 417 while (count) { 418 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 419 /* unpack receives into our local space */ 420 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 421 source[imdex] = recv_status.MPI_SOURCE; 422 lens[imdex] = n; 423 slen += n; 424 count--; 425 } 426 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 427 428 /* move the data into the send scatter */ 429 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 430 count = 0; 431 for (i=0; i<nrecvs; i++) { 432 values = rvalues + i*nmax; 433 for (j=0; j<lens[i]; j++) { 434 lrows[count++] = values[j] - base; 435 } 436 } 437 ierr = PetscFree(rvalues);CHKERRQ(ierr); 438 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 439 ierr = PetscFree(owner);CHKERRQ(ierr); 440 ierr = PetscFree(nprocs);CHKERRQ(ierr); 441 442 /* fix right hand side if needed */ 443 if (x && b) { 444 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 445 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 446 for (i=0; i<slen; i++) { 447 bb[lrows[i]] = diag*xx[lrows[i]]; 448 } 449 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 450 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 451 } 452 453 /* actually zap the local rows */ 454 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 455 ierr = PetscFree(lrows);CHKERRQ(ierr); 456 457 /* wait on sends */ 458 if (nsends) { 459 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 460 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 461 ierr = PetscFree(send_status);CHKERRQ(ierr); 462 } 463 ierr = PetscFree(send_waits);CHKERRQ(ierr); 464 ierr = PetscFree(svalues);CHKERRQ(ierr); 465 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatMult_MPIDense" 471 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 472 { 473 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 474 PetscErrorCode ierr; 475 476 PetscFunctionBegin; 477 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatMultAdd_MPIDense" 485 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 486 { 487 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 488 PetscErrorCode ierr; 489 490 PetscFunctionBegin; 491 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 492 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 493 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatMultTranspose_MPIDense" 499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 PetscErrorCode ierr; 503 PetscScalar zero = 0.0; 504 505 PetscFunctionBegin; 506 ierr = VecSet(yy,zero);CHKERRQ(ierr); 507 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 508 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 509 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 515 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 516 { 517 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 518 PetscErrorCode ierr; 519 520 PetscFunctionBegin; 521 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 522 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 523 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 524 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 525 PetscFunctionReturn(0); 526 } 527 528 #undef __FUNCT__ 529 #define __FUNCT__ "MatGetDiagonal_MPIDense" 530 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 533 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 534 PetscErrorCode ierr; 535 PetscInt len,i,n,m = A->rmap->n,radd; 536 PetscScalar *x,zero = 0.0; 537 538 PetscFunctionBegin; 539 ierr = VecSet(v,zero);CHKERRQ(ierr); 540 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 541 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 542 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 543 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 544 radd = A->rmap->rstart*m; 545 for (i=0; i<len; i++) { 546 x[i] = aloc->v[radd + i*m + i]; 547 } 548 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 549 PetscFunctionReturn(0); 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatDestroy_MPIDense" 554 PetscErrorCode MatDestroy_MPIDense(Mat mat) 555 { 556 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 557 PetscErrorCode ierr; 558 #if defined(PETSC_HAVE_PLAPACK) 559 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 560 #endif 561 562 PetscFunctionBegin; 563 564 #if defined(PETSC_USE_LOG) 565 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 566 #endif 567 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 568 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 569 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 570 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 571 #if defined(PETSC_HAVE_PLAPACK) 572 if (lu) { 573 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 574 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 575 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 576 577 if (lu->is_pla) { 578 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 579 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 580 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 581 } 582 } 583 #endif 584 585 ierr = PetscFree(mdn);CHKERRQ(ierr); 586 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 592 PetscFunctionReturn(0); 593 } 594 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatView_MPIDense_Binary" 597 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 598 { 599 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 600 PetscErrorCode ierr; 601 PetscViewerFormat format; 602 int fd; 603 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 604 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 605 PetscScalar *work,*v,*vv; 606 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 607 MPI_Status status; 608 609 PetscFunctionBegin; 610 if (mdn->size == 1) { 611 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 612 } else { 613 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 614 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 615 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 616 617 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 618 if (format == PETSC_VIEWER_NATIVE) { 619 620 if (!rank) { 621 /* store the matrix as a dense matrix */ 622 header[0] = MAT_FILE_CLASSID; 623 header[1] = mat->rmap->N; 624 header[2] = N; 625 header[3] = MATRIX_BINARY_FORMAT_DENSE; 626 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 627 628 /* get largest work array needed for transposing array */ 629 mmax = mat->rmap->n; 630 for (i=1; i<size; i++) { 631 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 632 } 633 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 634 635 /* write out local array, by rows */ 636 m = mat->rmap->n; 637 v = a->v; 638 for (j=0; j<N; j++) { 639 for (i=0; i<m; i++) { 640 work[j + i*N] = *v++; 641 } 642 } 643 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 644 /* get largest work array to receive messages from other processes, excludes process zero */ 645 mmax = 0; 646 for (i=1; i<size; i++) { 647 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 648 } 649 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 650 for(k = 1; k < size; k++) { 651 v = vv; 652 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 653 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 654 655 for(j = 0; j < N; j++) { 656 for(i = 0; i < m; i++) { 657 work[j + i*N] = *v++; 658 } 659 } 660 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 661 } 662 ierr = PetscFree(work);CHKERRQ(ierr); 663 ierr = PetscFree(vv);CHKERRQ(ierr); 664 } else { 665 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 666 } 667 } else { 668 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 669 } 670 } 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 676 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 677 { 678 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 679 PetscErrorCode ierr; 680 PetscMPIInt size = mdn->size,rank = mdn->rank; 681 const PetscViewerType vtype; 682 PetscBool iascii,isdraw; 683 PetscViewer sviewer; 684 PetscViewerFormat format; 685 #if defined(PETSC_HAVE_PLAPACK) 686 Mat_Plapack *lu; 687 #endif 688 689 PetscFunctionBegin; 690 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 691 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 692 if (iascii) { 693 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 694 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 695 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 696 MatInfo info; 697 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 698 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 699 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 700 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 701 #if defined(PETSC_HAVE_PLAPACK) 702 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 703 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 704 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 705 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 706 if (mat->factortype){ 707 lu=(Mat_Plapack*)(mat->spptr); 708 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 709 } 710 #else 711 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 712 #endif 713 PetscFunctionReturn(0); 714 } else if (format == PETSC_VIEWER_ASCII_INFO) { 715 PetscFunctionReturn(0); 716 } 717 } else if (isdraw) { 718 PetscDraw draw; 719 PetscBool isnull; 720 721 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 722 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 723 if (isnull) PetscFunctionReturn(0); 724 } 725 726 if (size == 1) { 727 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 728 } else { 729 /* assemble the entire matrix onto first processor. */ 730 Mat A; 731 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 732 PetscInt *cols; 733 PetscScalar *vals; 734 735 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 736 if (!rank) { 737 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 738 } else { 739 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 740 } 741 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 742 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 743 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 744 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 745 746 /* Copy the matrix ... This isn't the most efficient means, 747 but it's quick for now */ 748 A->insertmode = INSERT_VALUES; 749 row = mat->rmap->rstart; m = mdn->A->rmap->n; 750 for (i=0; i<m; i++) { 751 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 752 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 753 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 754 row++; 755 } 756 757 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 758 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 759 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 760 if (!rank) { 761 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 762 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 763 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 764 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 765 } 766 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 767 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 768 ierr = MatDestroy(A);CHKERRQ(ierr); 769 } 770 PetscFunctionReturn(0); 771 } 772 773 #undef __FUNCT__ 774 #define __FUNCT__ "MatView_MPIDense" 775 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 776 { 777 PetscErrorCode ierr; 778 PetscBool iascii,isbinary,isdraw,issocket; 779 780 PetscFunctionBegin; 781 782 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 783 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 784 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 785 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 786 787 if (iascii || issocket || isdraw) { 788 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 789 } else if (isbinary) { 790 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 791 } else { 792 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 793 } 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatGetInfo_MPIDense" 799 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 800 { 801 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 802 Mat mdn = mat->A; 803 PetscErrorCode ierr; 804 PetscReal isend[5],irecv[5]; 805 806 PetscFunctionBegin; 807 info->block_size = 1.0; 808 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 809 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 810 isend[3] = info->memory; isend[4] = info->mallocs; 811 if (flag == MAT_LOCAL) { 812 info->nz_used = isend[0]; 813 info->nz_allocated = isend[1]; 814 info->nz_unneeded = isend[2]; 815 info->memory = isend[3]; 816 info->mallocs = isend[4]; 817 } else if (flag == MAT_GLOBAL_MAX) { 818 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 819 info->nz_used = irecv[0]; 820 info->nz_allocated = irecv[1]; 821 info->nz_unneeded = irecv[2]; 822 info->memory = irecv[3]; 823 info->mallocs = irecv[4]; 824 } else if (flag == MAT_GLOBAL_SUM) { 825 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 826 info->nz_used = irecv[0]; 827 info->nz_allocated = irecv[1]; 828 info->nz_unneeded = irecv[2]; 829 info->memory = irecv[3]; 830 info->mallocs = irecv[4]; 831 } 832 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 833 info->fill_ratio_needed = 0; 834 info->factor_mallocs = 0; 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatSetOption_MPIDense" 840 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 841 { 842 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 843 PetscErrorCode ierr; 844 845 PetscFunctionBegin; 846 switch (op) { 847 case MAT_NEW_NONZERO_LOCATIONS: 848 case MAT_NEW_NONZERO_LOCATION_ERR: 849 case MAT_NEW_NONZERO_ALLOCATION_ERR: 850 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 851 break; 852 case MAT_ROW_ORIENTED: 853 a->roworiented = flg; 854 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 855 break; 856 case MAT_NEW_DIAGONALS: 857 case MAT_USE_HASH_TABLE: 858 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 859 break; 860 case MAT_IGNORE_OFF_PROC_ENTRIES: 861 a->donotstash = flg; 862 break; 863 case MAT_SYMMETRIC: 864 case MAT_STRUCTURALLY_SYMMETRIC: 865 case MAT_HERMITIAN: 866 case MAT_SYMMETRY_ETERNAL: 867 case MAT_IGNORE_LOWER_TRIANGULAR: 868 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 869 break; 870 default: 871 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 872 } 873 PetscFunctionReturn(0); 874 } 875 876 877 #undef __FUNCT__ 878 #define __FUNCT__ "MatDiagonalScale_MPIDense" 879 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 880 { 881 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 882 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 883 PetscScalar *l,*r,x,*v; 884 PetscErrorCode ierr; 885 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 886 887 PetscFunctionBegin; 888 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 889 if (ll) { 890 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 891 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 892 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 893 for (i=0; i<m; i++) { 894 x = l[i]; 895 v = mat->v + i; 896 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 897 } 898 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 899 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 900 } 901 if (rr) { 902 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 903 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 904 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 907 for (i=0; i<n; i++) { 908 x = r[i]; 909 v = mat->v + i*m; 910 for (j=0; j<m; j++) { (*v++) *= x;} 911 } 912 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 913 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 914 } 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatNorm_MPIDense" 920 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 921 { 922 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 923 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 924 PetscErrorCode ierr; 925 PetscInt i,j; 926 PetscReal sum = 0.0; 927 PetscScalar *v = mat->v; 928 929 PetscFunctionBegin; 930 if (mdn->size == 1) { 931 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 932 } else { 933 if (type == NORM_FROBENIUS) { 934 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 935 #if defined(PETSC_USE_COMPLEX) 936 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 937 #else 938 sum += (*v)*(*v); v++; 939 #endif 940 } 941 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 942 *nrm = sqrt(*nrm); 943 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 944 } else if (type == NORM_1) { 945 PetscReal *tmp,*tmp2; 946 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 947 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 948 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 949 *nrm = 0.0; 950 v = mat->v; 951 for (j=0; j<mdn->A->cmap->n; j++) { 952 for (i=0; i<mdn->A->rmap->n; i++) { 953 tmp[j] += PetscAbsScalar(*v); v++; 954 } 955 } 956 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 957 for (j=0; j<A->cmap->N; j++) { 958 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 959 } 960 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 961 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 962 } else if (type == NORM_INFINITY) { /* max row norm */ 963 PetscReal ntemp; 964 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 965 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 966 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 967 } 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatTranspose_MPIDense" 973 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 974 { 975 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 976 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 977 Mat B; 978 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 979 PetscErrorCode ierr; 980 PetscInt j,i; 981 PetscScalar *v; 982 983 PetscFunctionBegin; 984 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 985 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 986 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 987 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 988 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 989 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 990 } else { 991 B = *matout; 992 } 993 994 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 995 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 996 for (i=0; i<m; i++) rwork[i] = rstart + i; 997 for (j=0; j<n; j++) { 998 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 999 v += m; 1000 } 1001 ierr = PetscFree(rwork);CHKERRQ(ierr); 1002 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1003 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1004 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1005 *matout = B; 1006 } else { 1007 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 1013 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1014 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1018 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1019 { 1020 PetscErrorCode ierr; 1021 1022 PetscFunctionBegin; 1023 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #if defined(PETSC_HAVE_PLAPACK) 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1031 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1032 { 1033 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1034 PetscErrorCode ierr; 1035 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1036 PetscScalar *array; 1037 PetscReal one = 1.0; 1038 1039 PetscFunctionBegin; 1040 /* Copy A into F->lu->A */ 1041 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1042 ierr = PLA_API_begin();CHKERRQ(ierr); 1043 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1044 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1045 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1046 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1047 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1048 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1049 ierr = PLA_API_end();CHKERRQ(ierr); 1050 lu->rstart = rstart; 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1056 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1057 { 1058 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1059 PetscErrorCode ierr; 1060 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1061 PetscScalar *array; 1062 PetscReal one = 1.0; 1063 1064 PetscFunctionBegin; 1065 /* Copy F into A->lu->A */ 1066 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1067 ierr = PLA_API_begin();CHKERRQ(ierr); 1068 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1069 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1070 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1071 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1072 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1073 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1074 ierr = PLA_API_end();CHKERRQ(ierr); 1075 lu->rstart = rstart; 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1081 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1082 { 1083 PetscErrorCode ierr; 1084 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1085 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1086 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1087 PLA_Obj alpha = NULL,beta = NULL; 1088 1089 PetscFunctionBegin; 1090 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1091 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1092 1093 /* 1094 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1095 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1096 */ 1097 1098 /* do the multiply in PLA */ 1099 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1100 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1101 CHKMEMQ; 1102 1103 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1104 CHKMEMQ; 1105 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1106 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1107 1108 /* 1109 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1110 */ 1111 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1117 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1118 { 1119 PetscErrorCode ierr; 1120 PetscInt m=A->rmap->n,n=B->cmap->n; 1121 Mat Cmat; 1122 1123 PetscFunctionBegin; 1124 if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1125 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1126 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1127 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1128 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1129 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1130 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1131 1132 *C = Cmat; 1133 PetscFunctionReturn(0); 1134 } 1135 1136 #undef __FUNCT__ 1137 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1138 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1139 { 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 if (scall == MAT_INITIAL_MATRIX){ 1144 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1145 } 1146 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatSolve_MPIDense" 1152 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1153 { 1154 MPI_Comm comm = ((PetscObject)A)->comm; 1155 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1156 PetscErrorCode ierr; 1157 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1158 PetscScalar *array; 1159 PetscReal one = 1.0; 1160 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1161 PLA_Obj v_pla = NULL; 1162 PetscScalar *loc_buf; 1163 Vec loc_x; 1164 1165 PetscFunctionBegin; 1166 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1167 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1168 1169 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1170 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1171 PLA_Obj_set_to_zero(v_pla); 1172 1173 /* Copy b into rhs_pla */ 1174 PLA_API_begin(); 1175 PLA_Obj_API_open(v_pla); 1176 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1177 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1178 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1179 PLA_Obj_API_close(v_pla); 1180 PLA_API_end(); 1181 1182 if (A->factortype == MAT_FACTOR_LU){ 1183 /* Apply the permutations to the right hand sides */ 1184 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1185 1186 /* Solve L y = b, overwriting b with y */ 1187 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1188 1189 /* Solve U x = y (=b), overwriting b with x */ 1190 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1191 } else { /* MAT_FACTOR_CHOLESKY */ 1192 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1193 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1194 PLA_NONUNIT_DIAG,lu->A,v_pla); 1195 } 1196 1197 /* Copy PLAPACK x into Petsc vector x */ 1198 PLA_Obj_local_length(v_pla, &loc_m); 1199 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1200 PLA_Obj_local_stride(v_pla, &loc_stride); 1201 /* 1202 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1203 */ 1204 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1205 if (!lu->pla_solved){ 1206 1207 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1208 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1209 1210 /* Create IS and cts for VecScatterring */ 1211 PLA_Obj_local_length(v_pla, &loc_m); 1212 PLA_Obj_local_stride(v_pla, &loc_stride); 1213 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1214 1215 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1216 for (i=0; i<loc_m; i+=lu->nb){ 1217 j = 0; 1218 while (j < lu->nb && i+j < loc_m){ 1219 idx_petsc[i+j] = rstart + j; j++; 1220 } 1221 rstart += size*lu->nb; 1222 } 1223 1224 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1225 1226 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);CHKERRQ(ierr); 1227 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);CHKERRQ(ierr); 1228 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1229 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1230 } 1231 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1233 1234 /* Free data */ 1235 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1236 PLA_Obj_free(&v_pla); 1237 1238 lu->pla_solved = PETSC_TRUE; 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1244 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1245 { 1246 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1247 PetscErrorCode ierr; 1248 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1249 PetscInt info_pla=0; 1250 PetscScalar *array,one = 1.0; 1251 1252 PetscFunctionBegin; 1253 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1254 PLA_Obj_free(&lu->A); 1255 PLA_Obj_free (&lu->pivots); 1256 } 1257 /* Create PLAPACK matrix object */ 1258 lu->A = NULL; lu->pivots = NULL; 1259 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1260 PLA_Obj_set_to_zero(lu->A); 1261 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1262 1263 /* Copy A into lu->A */ 1264 PLA_API_begin(); 1265 PLA_Obj_API_open(lu->A); 1266 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1267 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1268 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1269 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1270 PLA_Obj_API_close(lu->A); 1271 PLA_API_end(); 1272 1273 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1274 info_pla = PLA_LU(lu->A,lu->pivots); 1275 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1276 1277 lu->rstart = rstart; 1278 lu->mstruct = SAME_NONZERO_PATTERN; 1279 F->ops->solve = MatSolve_MPIDense; 1280 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1286 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1287 { 1288 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1289 PetscErrorCode ierr; 1290 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1291 PetscInt info_pla=0; 1292 PetscScalar *array,one = 1.0; 1293 1294 PetscFunctionBegin; 1295 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1296 PLA_Obj_free(&lu->A); 1297 } 1298 /* Create PLAPACK matrix object */ 1299 lu->A = NULL; 1300 lu->pivots = NULL; 1301 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1302 1303 /* Copy A into lu->A */ 1304 PLA_API_begin(); 1305 PLA_Obj_API_open(lu->A); 1306 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1307 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1308 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1309 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1310 PLA_Obj_API_close(lu->A); 1311 PLA_API_end(); 1312 1313 /* Factor P A -> Chol */ 1314 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1315 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1316 1317 lu->rstart = rstart; 1318 lu->mstruct = SAME_NONZERO_PATTERN; 1319 F->ops->solve = MatSolve_MPIDense; 1320 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1321 PetscFunctionReturn(0); 1322 } 1323 1324 /* Note the Petsc perm permutation is ignored */ 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1327 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1328 { 1329 PetscErrorCode ierr; 1330 PetscBool issymmetric,set; 1331 1332 PetscFunctionBegin; 1333 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1334 if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1335 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1336 PetscFunctionReturn(0); 1337 } 1338 1339 /* Note the Petsc r and c permutations are ignored */ 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1342 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1343 { 1344 PetscErrorCode ierr; 1345 PetscInt M = A->rmap->N; 1346 Mat_Plapack *lu; 1347 1348 PetscFunctionBegin; 1349 lu = (Mat_Plapack*)F->spptr; 1350 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1351 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1352 PetscFunctionReturn(0); 1353 } 1354 1355 EXTERN_C_BEGIN 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1358 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1359 { 1360 PetscFunctionBegin; 1361 *type = MATSOLVERPLAPACK; 1362 PetscFunctionReturn(0); 1363 } 1364 EXTERN_C_END 1365 1366 EXTERN_C_BEGIN 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1369 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1370 { 1371 PetscErrorCode ierr; 1372 Mat_Plapack *lu; 1373 PetscMPIInt size; 1374 PetscInt M=A->rmap->N; 1375 1376 PetscFunctionBegin; 1377 /* Create the factorization matrix */ 1378 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1379 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1380 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1381 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1382 (*F)->spptr = (void*)lu; 1383 1384 /* Set default Plapack parameters */ 1385 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1386 lu->nb = M/size; 1387 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1388 1389 /* Set runtime options */ 1390 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1391 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1392 PetscOptionsEnd(); 1393 1394 /* Create object distribution template */ 1395 lu->templ = NULL; 1396 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1397 1398 /* Set the datatype */ 1399 #if defined(PETSC_USE_COMPLEX) 1400 lu->datatype = MPI_DOUBLE_COMPLEX; 1401 #else 1402 lu->datatype = MPI_DOUBLE; 1403 #endif 1404 1405 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1406 1407 1408 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1409 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1410 1411 if (ftype == MAT_FACTOR_LU) { 1412 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1413 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1414 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1415 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1416 (*F)->factortype = ftype; 1417 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 EXTERN_C_END 1421 #endif 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1425 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1426 { 1427 #if defined(PETSC_HAVE_PLAPACK) 1428 PetscErrorCode ierr; 1429 #endif 1430 1431 PetscFunctionBegin; 1432 #if defined(PETSC_HAVE_PLAPACK) 1433 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1434 #else 1435 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1436 #endif 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatAXPY_MPIDense" 1442 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1443 { 1444 PetscErrorCode ierr; 1445 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1446 1447 PetscFunctionBegin; 1448 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatConjugate_MPIDense" 1454 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat) 1455 { 1456 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1457 PetscErrorCode ierr; 1458 1459 PetscFunctionBegin; 1460 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatRealPart_MPIDense" 1466 PetscErrorCode MatRealPart_MPIDense(Mat A) 1467 { 1468 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1469 PetscErrorCode ierr; 1470 1471 PetscFunctionBegin; 1472 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1478 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1479 { 1480 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1481 PetscErrorCode ierr; 1482 1483 PetscFunctionBegin; 1484 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 /* -------------------------------------------------------------------*/ 1489 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1490 MatGetRow_MPIDense, 1491 MatRestoreRow_MPIDense, 1492 MatMult_MPIDense, 1493 /* 4*/ MatMultAdd_MPIDense, 1494 MatMultTranspose_MPIDense, 1495 MatMultTransposeAdd_MPIDense, 1496 0, 1497 0, 1498 0, 1499 /*10*/ 0, 1500 0, 1501 0, 1502 0, 1503 MatTranspose_MPIDense, 1504 /*15*/ MatGetInfo_MPIDense, 1505 MatEqual_MPIDense, 1506 MatGetDiagonal_MPIDense, 1507 MatDiagonalScale_MPIDense, 1508 MatNorm_MPIDense, 1509 /*20*/ MatAssemblyBegin_MPIDense, 1510 MatAssemblyEnd_MPIDense, 1511 MatSetOption_MPIDense, 1512 MatZeroEntries_MPIDense, 1513 /*24*/ MatZeroRows_MPIDense, 1514 0, 1515 0, 1516 0, 1517 0, 1518 /*29*/ MatSetUpPreallocation_MPIDense, 1519 0, 1520 0, 1521 MatGetArray_MPIDense, 1522 MatRestoreArray_MPIDense, 1523 /*34*/ MatDuplicate_MPIDense, 1524 0, 1525 0, 1526 0, 1527 0, 1528 /*39*/ MatAXPY_MPIDense, 1529 MatGetSubMatrices_MPIDense, 1530 0, 1531 MatGetValues_MPIDense, 1532 0, 1533 /*44*/ 0, 1534 MatScale_MPIDense, 1535 0, 1536 0, 1537 0, 1538 /*49*/ 0, 1539 0, 1540 0, 1541 0, 1542 0, 1543 /*54*/ 0, 1544 0, 1545 0, 1546 0, 1547 0, 1548 /*59*/ MatGetSubMatrix_MPIDense, 1549 MatDestroy_MPIDense, 1550 MatView_MPIDense, 1551 0, 1552 0, 1553 /*64*/ 0, 1554 0, 1555 0, 1556 0, 1557 0, 1558 /*69*/ 0, 1559 0, 1560 0, 1561 0, 1562 0, 1563 /*74*/ 0, 1564 0, 1565 0, 1566 0, 1567 0, 1568 /*79*/ 0, 1569 0, 1570 0, 1571 0, 1572 /*83*/ MatLoad_MPIDense, 1573 0, 1574 0, 1575 0, 1576 0, 1577 0, 1578 /*89*/ 1579 #if defined(PETSC_HAVE_PLAPACK) 1580 MatMatMult_MPIDense_MPIDense, 1581 MatMatMultSymbolic_MPIDense_MPIDense, 1582 MatMatMultNumeric_MPIDense_MPIDense, 1583 #else 1584 0, 1585 0, 1586 0, 1587 #endif 1588 0, 1589 0, 1590 /*94*/ 0, 1591 0, 1592 0, 1593 0, 1594 0, 1595 /*99*/ 0, 1596 0, 1597 0, 1598 MatConjugate_MPIDense, 1599 0, 1600 /*104*/0, 1601 MatRealPart_MPIDense, 1602 MatImaginaryPart_MPIDense, 1603 0, 1604 0, 1605 /*109*/0, 1606 0, 1607 0, 1608 0, 1609 0, 1610 /*114*/0, 1611 0, 1612 0, 1613 0, 1614 0, 1615 /*119*/0, 1616 0, 1617 0, 1618 0 1619 }; 1620 1621 EXTERN_C_BEGIN 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1624 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1625 { 1626 Mat_MPIDense *a; 1627 PetscErrorCode ierr; 1628 1629 PetscFunctionBegin; 1630 mat->preallocated = PETSC_TRUE; 1631 /* Note: For now, when data is specified above, this assumes the user correctly 1632 allocates the local dense storage space. We should add error checking. */ 1633 1634 a = (Mat_MPIDense*)mat->data; 1635 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1636 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1637 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1638 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1639 a->nvec = mat->cmap->n; 1640 1641 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1642 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1643 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1644 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1645 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1646 PetscFunctionReturn(0); 1647 } 1648 EXTERN_C_END 1649 1650 /*MC 1651 MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1652 1653 run ./configure with the option --download-plapack 1654 1655 1656 Options Database Keys: 1657 . -mat_plapack_nprows <n> - number of rows in processor partition 1658 . -mat_plapack_npcols <n> - number of columns in processor partition 1659 . -mat_plapack_nb <n> - block size of template vector 1660 . -mat_plapack_nb_alg <n> - algorithmic block size 1661 - -mat_plapack_ckerror <n> - error checking flag 1662 1663 Level: intermediate 1664 1665 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1666 1667 M*/ 1668 1669 EXTERN_C_BEGIN 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "MatCreate_MPIDense" 1672 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1673 { 1674 Mat_MPIDense *a; 1675 PetscErrorCode ierr; 1676 1677 PetscFunctionBegin; 1678 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1679 mat->data = (void*)a; 1680 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1681 mat->mapping = 0; 1682 1683 mat->insertmode = NOT_SET_VALUES; 1684 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1685 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1686 1687 /* build cache for off array entries formed */ 1688 a->donotstash = PETSC_FALSE; 1689 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1690 1691 /* stuff used for matrix vector multiply */ 1692 a->lvec = 0; 1693 a->Mvctx = 0; 1694 a->roworiented = PETSC_TRUE; 1695 1696 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1697 "MatGetDiagonalBlock_MPIDense", 1698 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1699 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1700 "MatMPIDenseSetPreallocation_MPIDense", 1701 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1702 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1703 "MatMatMult_MPIAIJ_MPIDense", 1704 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1705 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1706 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1707 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1708 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1709 "MatMatMultNumeric_MPIAIJ_MPIDense", 1710 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1711 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1712 "MatGetFactor_mpidense_petsc", 1713 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1714 #if defined(PETSC_HAVE_PLAPACK) 1715 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1716 "MatGetFactor_mpidense_plapack", 1717 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1718 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1719 #endif 1720 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1721 1722 PetscFunctionReturn(0); 1723 } 1724 EXTERN_C_END 1725 1726 /*MC 1727 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1728 1729 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1730 and MATMPIDENSE otherwise. 1731 1732 Options Database Keys: 1733 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1734 1735 Level: beginner 1736 1737 1738 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1739 M*/ 1740 1741 EXTERN_C_BEGIN 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatCreate_Dense" 1744 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1745 { 1746 PetscErrorCode ierr; 1747 PetscMPIInt size; 1748 1749 PetscFunctionBegin; 1750 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1751 if (size == 1) { 1752 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1753 } else { 1754 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1755 } 1756 PetscFunctionReturn(0); 1757 } 1758 EXTERN_C_END 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1762 /*@C 1763 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1764 1765 Not collective 1766 1767 Input Parameters: 1768 . A - the matrix 1769 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1770 to control all matrix memory allocation. 1771 1772 Notes: 1773 The dense format is fully compatible with standard Fortran 77 1774 storage by columns. 1775 1776 The data input variable is intended primarily for Fortran programmers 1777 who wish to allocate their own matrix memory space. Most users should 1778 set data=PETSC_NULL. 1779 1780 Level: intermediate 1781 1782 .keywords: matrix,dense, parallel 1783 1784 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1785 @*/ 1786 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1787 { 1788 PetscErrorCode ierr; 1789 1790 PetscFunctionBegin; 1791 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "MatCreateMPIDense" 1797 /*@C 1798 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1799 1800 Collective on MPI_Comm 1801 1802 Input Parameters: 1803 + comm - MPI communicator 1804 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1805 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1806 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1807 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1808 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1809 to control all matrix memory allocation. 1810 1811 Output Parameter: 1812 . A - the matrix 1813 1814 Notes: 1815 The dense format is fully compatible with standard Fortran 77 1816 storage by columns. 1817 1818 The data input variable is intended primarily for Fortran programmers 1819 who wish to allocate their own matrix memory space. Most users should 1820 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1821 1822 The user MUST specify either the local or global matrix dimensions 1823 (possibly both). 1824 1825 Level: intermediate 1826 1827 .keywords: matrix,dense, parallel 1828 1829 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1830 @*/ 1831 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1832 { 1833 PetscErrorCode ierr; 1834 PetscMPIInt size; 1835 1836 PetscFunctionBegin; 1837 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1838 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1839 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1840 if (size > 1) { 1841 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1842 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1843 } else { 1844 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1845 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1846 } 1847 PetscFunctionReturn(0); 1848 } 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatDuplicate_MPIDense" 1852 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1853 { 1854 Mat mat; 1855 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1856 PetscErrorCode ierr; 1857 1858 PetscFunctionBegin; 1859 *newmat = 0; 1860 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1861 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1862 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1863 a = (Mat_MPIDense*)mat->data; 1864 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1865 1866 mat->factortype = A->factortype; 1867 mat->assembled = PETSC_TRUE; 1868 mat->preallocated = PETSC_TRUE; 1869 1870 a->size = oldmat->size; 1871 a->rank = oldmat->rank; 1872 mat->insertmode = NOT_SET_VALUES; 1873 a->nvec = oldmat->nvec; 1874 a->donotstash = oldmat->donotstash; 1875 1876 ierr = PetscLayoutCopy(A->rmap,&mat->rmap);CHKERRQ(ierr); 1877 ierr = PetscLayoutCopy(A->cmap,&mat->cmap);CHKERRQ(ierr); 1878 1879 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1880 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1881 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1882 1883 *newmat = mat; 1884 PetscFunctionReturn(0); 1885 } 1886 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1889 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1890 { 1891 PetscErrorCode ierr; 1892 PetscMPIInt rank,size; 1893 PetscInt *rowners,i,m,nz,j; 1894 PetscScalar *array,*vals,*vals_ptr; 1895 MPI_Status status; 1896 1897 PetscFunctionBegin; 1898 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1899 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1900 1901 /* determine ownership of all rows */ 1902 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1903 else m = newmat->rmap->n; 1904 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1905 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1906 rowners[0] = 0; 1907 for (i=2; i<=size; i++) { 1908 rowners[i] += rowners[i-1]; 1909 } 1910 1911 if (!sizesset) { 1912 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1913 } 1914 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1915 ierr = MatGetArray(newmat,&array);CHKERRQ(ierr); 1916 1917 if (!rank) { 1918 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1919 1920 /* read in my part of the matrix numerical values */ 1921 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1922 1923 /* insert into matrix-by row (this is why cannot directly read into array */ 1924 vals_ptr = vals; 1925 for (i=0; i<m; i++) { 1926 for (j=0; j<N; j++) { 1927 array[i + j*m] = *vals_ptr++; 1928 } 1929 } 1930 1931 /* read in other processors and ship out */ 1932 for (i=1; i<size; i++) { 1933 nz = (rowners[i+1] - rowners[i])*N; 1934 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1935 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1936 } 1937 } else { 1938 /* receive numeric values */ 1939 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1940 1941 /* receive message of values*/ 1942 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr); 1943 1944 /* insert into matrix-by row (this is why cannot directly read into array */ 1945 vals_ptr = vals; 1946 for (i=0; i<m; i++) { 1947 for (j=0; j<N; j++) { 1948 array[i + j*m] = *vals_ptr++; 1949 } 1950 } 1951 } 1952 ierr = PetscFree(rowners);CHKERRQ(ierr); 1953 ierr = PetscFree(vals);CHKERRQ(ierr); 1954 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1955 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1956 PetscFunctionReturn(0); 1957 } 1958 1959 #undef __FUNCT__ 1960 #define __FUNCT__ "MatLoad_MPIDense" 1961 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1962 { 1963 PetscScalar *vals,*svals; 1964 MPI_Comm comm = ((PetscObject)viewer)->comm; 1965 MPI_Status status; 1966 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1967 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1968 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1969 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1970 int fd; 1971 PetscErrorCode ierr; 1972 1973 PetscFunctionBegin; 1974 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1975 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1976 if (!rank) { 1977 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1978 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1979 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1980 } 1981 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1982 1983 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1984 M = header[1]; N = header[2]; nz = header[3]; 1985 1986 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1987 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1988 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1989 1990 /* If global sizes are set, check if they are consistent with that given in the file */ 1991 if (sizesset) { 1992 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1993 } 1994 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1995 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1996 1997 /* 1998 Handle case where matrix is stored on disk as a dense matrix 1999 */ 2000 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 2001 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 2002 PetscFunctionReturn(0); 2003 } 2004 2005 /* determine ownership of all rows */ 2006 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 2007 else m = PetscMPIIntCast(newmat->rmap->n); 2008 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2009 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2010 rowners[0] = 0; 2011 for (i=2; i<=size; i++) { 2012 rowners[i] += rowners[i-1]; 2013 } 2014 rstart = rowners[rank]; 2015 rend = rowners[rank+1]; 2016 2017 /* distribute row lengths to all processors */ 2018 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 2019 if (!rank) { 2020 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2021 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2022 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2023 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2024 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2025 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2026 } else { 2027 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2028 } 2029 2030 if (!rank) { 2031 /* calculate the number of nonzeros on each processor */ 2032 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2033 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2034 for (i=0; i<size; i++) { 2035 for (j=rowners[i]; j< rowners[i+1]; j++) { 2036 procsnz[i] += rowlengths[j]; 2037 } 2038 } 2039 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2040 2041 /* determine max buffer needed and allocate it */ 2042 maxnz = 0; 2043 for (i=0; i<size; i++) { 2044 maxnz = PetscMax(maxnz,procsnz[i]); 2045 } 2046 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2047 2048 /* read in my part of the matrix column indices */ 2049 nz = procsnz[0]; 2050 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2051 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2052 2053 /* read in every one elses and ship off */ 2054 for (i=1; i<size; i++) { 2055 nz = procsnz[i]; 2056 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2057 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2058 } 2059 ierr = PetscFree(cols);CHKERRQ(ierr); 2060 } else { 2061 /* determine buffer space needed for message */ 2062 nz = 0; 2063 for (i=0; i<m; i++) { 2064 nz += ourlens[i]; 2065 } 2066 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2067 2068 /* receive message of column indices*/ 2069 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2070 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2071 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2072 } 2073 2074 /* loop over local rows, determining number of off diagonal entries */ 2075 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2076 jj = 0; 2077 for (i=0; i<m; i++) { 2078 for (j=0; j<ourlens[i]; j++) { 2079 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2080 jj++; 2081 } 2082 } 2083 2084 /* create our matrix */ 2085 for (i=0; i<m; i++) { 2086 ourlens[i] -= offlens[i]; 2087 } 2088 2089 if (!sizesset) { 2090 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2091 } 2092 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 2093 for (i=0; i<m; i++) { 2094 ourlens[i] += offlens[i]; 2095 } 2096 2097 if (!rank) { 2098 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2099 2100 /* read in my part of the matrix numerical values */ 2101 nz = procsnz[0]; 2102 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2103 2104 /* insert into matrix */ 2105 jj = rstart; 2106 smycols = mycols; 2107 svals = vals; 2108 for (i=0; i<m; i++) { 2109 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2110 smycols += ourlens[i]; 2111 svals += ourlens[i]; 2112 jj++; 2113 } 2114 2115 /* read in other processors and ship out */ 2116 for (i=1; i<size; i++) { 2117 nz = procsnz[i]; 2118 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2119 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2120 } 2121 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2122 } else { 2123 /* receive numeric values */ 2124 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2125 2126 /* receive message of values*/ 2127 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2128 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2129 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2130 2131 /* insert into matrix */ 2132 jj = rstart; 2133 smycols = mycols; 2134 svals = vals; 2135 for (i=0; i<m; i++) { 2136 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2137 smycols += ourlens[i]; 2138 svals += ourlens[i]; 2139 jj++; 2140 } 2141 } 2142 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2143 ierr = PetscFree(vals);CHKERRQ(ierr); 2144 ierr = PetscFree(mycols);CHKERRQ(ierr); 2145 ierr = PetscFree(rowners);CHKERRQ(ierr); 2146 2147 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2148 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2149 PetscFunctionReturn(0); 2150 } 2151 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatEqual_MPIDense" 2154 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2155 { 2156 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2157 Mat a,b; 2158 PetscBool flg; 2159 PetscErrorCode ierr; 2160 2161 PetscFunctionBegin; 2162 a = matA->A; 2163 b = matB->A; 2164 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2165 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2166 PetscFunctionReturn(0); 2167 } 2168 2169 #if defined(PETSC_HAVE_PLAPACK) 2170 2171 #undef __FUNCT__ 2172 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2173 /*@C 2174 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2175 Level: developer 2176 2177 .keywords: Petsc, destroy, package, PLAPACK 2178 .seealso: PetscFinalize() 2179 @*/ 2180 PetscErrorCode PETSCMAT_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2181 { 2182 PetscErrorCode ierr; 2183 2184 PetscFunctionBegin; 2185 ierr = PLA_Finalize();CHKERRQ(ierr); 2186 PetscFunctionReturn(0); 2187 } 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2191 /*@C 2192 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2193 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2194 2195 Input Parameter: 2196 . comm - the communicator the matrix lives on 2197 2198 Level: developer 2199 2200 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2201 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2202 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2203 cannot overlap. 2204 2205 .keywords: Petsc, initialize, package, PLAPACK 2206 .seealso: PetscSysInitializePackage(), PetscInitialize() 2207 @*/ 2208 PetscErrorCode PETSCMAT_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2209 { 2210 PetscMPIInt size; 2211 PetscErrorCode ierr; 2212 2213 PetscFunctionBegin; 2214 if (!PLA_Initialized(PETSC_NULL)) { 2215 2216 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2217 Plapack_nprows = 1; 2218 Plapack_npcols = size; 2219 2220 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2221 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2222 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2223 #if defined(PETSC_USE_DEBUG) 2224 Plapack_ierror = 3; 2225 #else 2226 Plapack_ierror = 0; 2227 #endif 2228 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2229 if (Plapack_ierror){ 2230 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2231 } else { 2232 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2233 } 2234 2235 Plapack_nb_alg = 0; 2236 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2237 if (Plapack_nb_alg) { 2238 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2239 } 2240 PetscOptionsEnd(); 2241 2242 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2243 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2244 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2245 } 2246 PetscFunctionReturn(0); 2247 } 2248 2249 #endif 2250