1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 EXTERN_C_BEGIN 13 #include "PLA.h" 14 EXTERN_C_END 15 16 typedef struct { 17 PLA_Obj A,pivots; 18 PLA_Template templ; 19 MPI_Datatype datatype; 20 PetscInt nb,rstart; 21 VecScatter ctx; 22 IS is_pla,is_petsc; 23 PetscBool pla_solved; 24 MatStructure mstruct; 25 } Mat_Plapack; 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatDenseGetLocalMatrix" 30 /*@ 31 32 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 33 matrix that represents the operator. For sequential matrices it returns itself. 34 35 Input Parameter: 36 . A - the Seq or MPI dense matrix 37 38 Output Parameter: 39 . B - the inner matrix 40 41 Level: intermediate 42 43 @*/ 44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 45 { 46 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 47 PetscErrorCode ierr; 48 PetscBool flg; 49 50 PetscFunctionBegin; 51 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 52 if (flg) { 53 *B = mat->A; 54 } else { 55 *B = A; 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatGetRow_MPIDense" 62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 63 { 64 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 65 PetscErrorCode ierr; 66 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 67 68 PetscFunctionBegin; 69 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 70 lrow = row - rstart; 71 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatRestoreRow_MPIDense" 77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 83 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 84 PetscFunctionReturn(0); 85 } 86 87 EXTERN_C_BEGIN 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *B) 91 { 92 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 93 PetscErrorCode ierr; 94 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 95 PetscScalar *array; 96 MPI_Comm comm; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 100 101 /* The reuse aspect is not implemented efficiently */ 102 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 103 104 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 105 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 106 ierr = MatCreate(comm,B);CHKERRQ(ierr); 107 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 108 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 109 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 110 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 111 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 113 114 *iscopy = PETSC_TRUE; 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatSetValues_MPIDense" 121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 122 { 123 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 124 PetscErrorCode ierr; 125 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 126 PetscBool roworiented = A->roworiented; 127 128 PetscFunctionBegin; 129 if (v) PetscValidScalarPointer(v,6); 130 for (i=0; i<m; i++) { 131 if (idxm[i] < 0) continue; 132 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 133 if (idxm[i] >= rstart && idxm[i] < rend) { 134 row = idxm[i] - rstart; 135 if (roworiented) { 136 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 137 } else { 138 for (j=0; j<n; j++) { 139 if (idxn[j] < 0) continue; 140 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 141 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 142 } 143 } 144 } else { 145 if (!A->donotstash) { 146 if (roworiented) { 147 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 148 } else { 149 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 150 } 151 } 152 } 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetValues_MPIDense" 159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 160 { 161 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 164 165 PetscFunctionBegin; 166 for (i=0; i<m; i++) { 167 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 168 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 169 if (idxm[i] >= rstart && idxm[i] < rend) { 170 row = idxm[i] - rstart; 171 for (j=0; j<n; j++) { 172 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 173 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 174 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 175 } 176 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 177 } 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatGetArray_MPIDense" 183 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 184 { 185 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 186 PetscErrorCode ierr; 187 188 PetscFunctionBegin; 189 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 195 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 196 { 197 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 198 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 199 PetscErrorCode ierr; 200 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 201 const PetscInt *irow,*icol; 202 PetscScalar *av,*bv,*v = lmat->v; 203 Mat newmat; 204 IS iscol_local; 205 206 PetscFunctionBegin; 207 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 208 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 209 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 210 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 211 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 212 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 213 214 /* No parallel redistribution currently supported! Should really check each index set 215 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 216 original matrix! */ 217 218 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 219 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 220 221 /* Check submatrix call */ 222 if (scall == MAT_REUSE_MATRIX) { 223 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 224 /* Really need to test rows and column sizes! */ 225 newmat = *B; 226 } else { 227 /* Create and fill new matrix */ 228 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 229 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 230 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 231 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 232 } 233 234 /* Now extract the data pointers and do the copy, column at a time */ 235 newmatd = (Mat_MPIDense*)newmat->data; 236 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 237 238 for (i=0; i<Ncols; i++) { 239 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 240 for (j=0; j<nrows; j++) { 241 *bv++ = av[irow[j] - rstart]; 242 } 243 } 244 245 /* Assemble the matrices so that the correct flags are set */ 246 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 248 249 /* Free work space */ 250 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 251 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 252 *B = newmat; 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatRestoreArray_MPIDense" 258 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 259 { 260 PetscFunctionBegin; 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 266 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 267 { 268 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 269 MPI_Comm comm = ((PetscObject)mat)->comm; 270 PetscErrorCode ierr; 271 PetscInt nstash,reallocs; 272 InsertMode addv; 273 274 PetscFunctionBegin; 275 /* make sure all processors are either in INSERTMODE or ADDMODE */ 276 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 277 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 278 mat->insertmode = addv; /* in case this processor had no cache */ 279 280 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 281 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 282 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 288 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 289 { 290 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 291 PetscErrorCode ierr; 292 PetscInt i,*row,*col,flg,j,rstart,ncols; 293 PetscMPIInt n; 294 PetscScalar *val; 295 InsertMode addv=mat->insertmode; 296 297 PetscFunctionBegin; 298 /* wait on receives */ 299 while (1) { 300 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 301 if (!flg) break; 302 303 for (i=0; i<n;) { 304 /* Now identify the consecutive vals belonging to the same row */ 305 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 306 if (j < n) ncols = j-i; 307 else ncols = n-i; 308 /* Now assemble all these values with a single function call */ 309 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 310 i = j; 311 } 312 } 313 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 314 315 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 316 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 317 318 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 319 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatZeroEntries_MPIDense" 326 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 327 { 328 PetscErrorCode ierr; 329 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 330 331 PetscFunctionBegin; 332 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 333 PetscFunctionReturn(0); 334 } 335 336 /* the code does not do the diagonal entries correctly unless the 337 matrix is square and the column and row owerships are identical. 338 This is a BUG. The only way to fix it seems to be to access 339 mdn->A and mdn->B directly and not through the MatZeroRows() 340 routine. 341 */ 342 #undef __FUNCT__ 343 #define __FUNCT__ "MatZeroRows_MPIDense" 344 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 345 { 346 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 347 PetscErrorCode ierr; 348 PetscInt i,*owners = A->rmap->range; 349 PetscInt *nprocs,j,idx,nsends; 350 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 351 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 352 PetscInt *lens,*lrows,*values; 353 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 354 MPI_Comm comm = ((PetscObject)A)->comm; 355 MPI_Request *send_waits,*recv_waits; 356 MPI_Status recv_status,*send_status; 357 PetscBool found; 358 const PetscScalar *xx; 359 PetscScalar *bb; 360 361 PetscFunctionBegin; 362 /* first count number of contributors to each processor */ 363 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 364 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 365 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 366 for (i=0; i<N; i++) { 367 idx = rows[i]; 368 found = PETSC_FALSE; 369 for (j=0; j<size; j++) { 370 if (idx >= owners[j] && idx < owners[j+1]) { 371 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 372 } 373 } 374 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 375 } 376 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 377 378 /* inform other processors of number of messages and max length*/ 379 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 380 381 /* post receives: */ 382 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 383 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 384 for (i=0; i<nrecvs; i++) { 385 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 386 } 387 388 /* do sends: 389 1) starts[i] gives the starting index in svalues for stuff going to 390 the ith processor 391 */ 392 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 393 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 394 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 395 starts[0] = 0; 396 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 397 for (i=0; i<N; i++) { 398 svalues[starts[owner[i]]++] = rows[i]; 399 } 400 401 starts[0] = 0; 402 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 403 count = 0; 404 for (i=0; i<size; i++) { 405 if (nprocs[2*i+1]) { 406 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 407 } 408 } 409 ierr = PetscFree(starts);CHKERRQ(ierr); 410 411 base = owners[rank]; 412 413 /* wait on receives */ 414 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 415 count = nrecvs; 416 slen = 0; 417 while (count) { 418 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 419 /* unpack receives into our local space */ 420 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 421 source[imdex] = recv_status.MPI_SOURCE; 422 lens[imdex] = n; 423 slen += n; 424 count--; 425 } 426 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 427 428 /* move the data into the send scatter */ 429 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 430 count = 0; 431 for (i=0; i<nrecvs; i++) { 432 values = rvalues + i*nmax; 433 for (j=0; j<lens[i]; j++) { 434 lrows[count++] = values[j] - base; 435 } 436 } 437 ierr = PetscFree(rvalues);CHKERRQ(ierr); 438 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 439 ierr = PetscFree(owner);CHKERRQ(ierr); 440 ierr = PetscFree(nprocs);CHKERRQ(ierr); 441 442 /* fix right hand side if needed */ 443 if (x && b) { 444 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 445 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 446 for (i=0; i<slen; i++) { 447 bb[lrows[i]] = diag*xx[lrows[i]]; 448 } 449 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 450 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 451 } 452 453 /* actually zap the local rows */ 454 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 455 ierr = PetscFree(lrows);CHKERRQ(ierr); 456 457 /* wait on sends */ 458 if (nsends) { 459 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 460 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 461 ierr = PetscFree(send_status);CHKERRQ(ierr); 462 } 463 ierr = PetscFree(send_waits);CHKERRQ(ierr); 464 ierr = PetscFree(svalues);CHKERRQ(ierr); 465 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatMult_MPIDense" 471 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 472 { 473 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 474 PetscErrorCode ierr; 475 476 PetscFunctionBegin; 477 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatMultAdd_MPIDense" 485 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 486 { 487 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 488 PetscErrorCode ierr; 489 490 PetscFunctionBegin; 491 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 492 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 493 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatMultTranspose_MPIDense" 499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 PetscErrorCode ierr; 503 PetscScalar zero = 0.0; 504 505 PetscFunctionBegin; 506 ierr = VecSet(yy,zero);CHKERRQ(ierr); 507 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 508 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 509 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 515 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 516 { 517 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 518 PetscErrorCode ierr; 519 520 PetscFunctionBegin; 521 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 522 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 523 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 524 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 525 PetscFunctionReturn(0); 526 } 527 528 #undef __FUNCT__ 529 #define __FUNCT__ "MatGetDiagonal_MPIDense" 530 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 533 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 534 PetscErrorCode ierr; 535 PetscInt len,i,n,m = A->rmap->n,radd; 536 PetscScalar *x,zero = 0.0; 537 538 PetscFunctionBegin; 539 ierr = VecSet(v,zero);CHKERRQ(ierr); 540 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 541 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 542 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 543 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 544 radd = A->rmap->rstart*m; 545 for (i=0; i<len; i++) { 546 x[i] = aloc->v[radd + i*m + i]; 547 } 548 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 549 PetscFunctionReturn(0); 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatDestroy_MPIDense" 554 PetscErrorCode MatDestroy_MPIDense(Mat mat) 555 { 556 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 557 PetscErrorCode ierr; 558 #if defined(PETSC_HAVE_PLAPACK) 559 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 560 #endif 561 562 PetscFunctionBegin; 563 564 #if defined(PETSC_USE_LOG) 565 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 566 #endif 567 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 568 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 569 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 570 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 571 #if defined(PETSC_HAVE_PLAPACK) 572 if (lu) { 573 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 574 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 575 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 576 577 if (lu->is_pla) { 578 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 579 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 580 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 581 } 582 } 583 #endif 584 585 ierr = PetscFree(mdn);CHKERRQ(ierr); 586 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 592 PetscFunctionReturn(0); 593 } 594 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatView_MPIDense_Binary" 597 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 598 { 599 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 600 PetscErrorCode ierr; 601 PetscViewerFormat format; 602 int fd; 603 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 604 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 605 PetscScalar *work,*v,*vv; 606 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 607 MPI_Status status; 608 609 PetscFunctionBegin; 610 if (mdn->size == 1) { 611 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 612 } else { 613 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 614 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 615 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 616 617 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 618 if (format == PETSC_VIEWER_NATIVE) { 619 620 if (!rank) { 621 /* store the matrix as a dense matrix */ 622 header[0] = MAT_FILE_CLASSID; 623 header[1] = mat->rmap->N; 624 header[2] = N; 625 header[3] = MATRIX_BINARY_FORMAT_DENSE; 626 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 627 628 /* get largest work array needed for transposing array */ 629 mmax = mat->rmap->n; 630 for (i=1; i<size; i++) { 631 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 632 } 633 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 634 635 /* write out local array, by rows */ 636 m = mat->rmap->n; 637 v = a->v; 638 for (j=0; j<N; j++) { 639 for (i=0; i<m; i++) { 640 work[j + i*N] = *v++; 641 } 642 } 643 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 644 /* get largest work array to receive messages from other processes, excludes process zero */ 645 mmax = 0; 646 for (i=1; i<size; i++) { 647 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 648 } 649 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 650 for(k = 1; k < size; k++) { 651 v = vv; 652 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 653 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 654 655 for(j = 0; j < N; j++) { 656 for(i = 0; i < m; i++) { 657 work[j + i*N] = *v++; 658 } 659 } 660 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 661 } 662 ierr = PetscFree(work);CHKERRQ(ierr); 663 ierr = PetscFree(vv);CHKERRQ(ierr); 664 } else { 665 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 666 } 667 } else { 668 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 669 } 670 } 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 676 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 677 { 678 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 679 PetscErrorCode ierr; 680 PetscMPIInt size = mdn->size,rank = mdn->rank; 681 const PetscViewerType vtype; 682 PetscBool iascii,isdraw; 683 PetscViewer sviewer; 684 PetscViewerFormat format; 685 #if defined(PETSC_HAVE_PLAPACK) 686 Mat_Plapack *lu; 687 #endif 688 689 PetscFunctionBegin; 690 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 691 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 692 if (iascii) { 693 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 694 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 695 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 696 MatInfo info; 697 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 698 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 699 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 700 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 701 #if defined(PETSC_HAVE_PLAPACK) 702 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 703 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 704 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 705 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 706 if (mat->factortype){ 707 lu=(Mat_Plapack*)(mat->spptr); 708 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 709 } 710 #else 711 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 712 #endif 713 PetscFunctionReturn(0); 714 } else if (format == PETSC_VIEWER_ASCII_INFO) { 715 PetscFunctionReturn(0); 716 } 717 } else if (isdraw) { 718 PetscDraw draw; 719 PetscBool isnull; 720 721 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 722 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 723 if (isnull) PetscFunctionReturn(0); 724 } 725 726 if (size == 1) { 727 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 728 } else { 729 /* assemble the entire matrix onto first processor. */ 730 Mat A; 731 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 732 PetscInt *cols; 733 PetscScalar *vals; 734 735 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 736 if (!rank) { 737 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 738 } else { 739 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 740 } 741 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 742 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 743 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 744 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 745 746 /* Copy the matrix ... This isn't the most efficient means, 747 but it's quick for now */ 748 A->insertmode = INSERT_VALUES; 749 row = mat->rmap->rstart; m = mdn->A->rmap->n; 750 for (i=0; i<m; i++) { 751 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 752 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 753 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 754 row++; 755 } 756 757 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 758 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 759 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 760 if (!rank) { 761 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 762 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 763 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 764 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 765 } 766 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 767 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 768 ierr = MatDestroy(A);CHKERRQ(ierr); 769 } 770 PetscFunctionReturn(0); 771 } 772 773 #undef __FUNCT__ 774 #define __FUNCT__ "MatView_MPIDense" 775 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 776 { 777 PetscErrorCode ierr; 778 PetscBool iascii,isbinary,isdraw,issocket; 779 780 PetscFunctionBegin; 781 782 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 783 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 784 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 785 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 786 787 if (iascii || issocket || isdraw) { 788 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 789 } else if (isbinary) { 790 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 791 } else { 792 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 793 } 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatGetInfo_MPIDense" 799 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 800 { 801 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 802 Mat mdn = mat->A; 803 PetscErrorCode ierr; 804 PetscReal isend[5],irecv[5]; 805 806 PetscFunctionBegin; 807 info->block_size = 1.0; 808 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 809 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 810 isend[3] = info->memory; isend[4] = info->mallocs; 811 if (flag == MAT_LOCAL) { 812 info->nz_used = isend[0]; 813 info->nz_allocated = isend[1]; 814 info->nz_unneeded = isend[2]; 815 info->memory = isend[3]; 816 info->mallocs = isend[4]; 817 } else if (flag == MAT_GLOBAL_MAX) { 818 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 819 info->nz_used = irecv[0]; 820 info->nz_allocated = irecv[1]; 821 info->nz_unneeded = irecv[2]; 822 info->memory = irecv[3]; 823 info->mallocs = irecv[4]; 824 } else if (flag == MAT_GLOBAL_SUM) { 825 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 826 info->nz_used = irecv[0]; 827 info->nz_allocated = irecv[1]; 828 info->nz_unneeded = irecv[2]; 829 info->memory = irecv[3]; 830 info->mallocs = irecv[4]; 831 } 832 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 833 info->fill_ratio_needed = 0; 834 info->factor_mallocs = 0; 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatSetOption_MPIDense" 840 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 841 { 842 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 843 PetscErrorCode ierr; 844 845 PetscFunctionBegin; 846 switch (op) { 847 case MAT_NEW_NONZERO_LOCATIONS: 848 case MAT_NEW_NONZERO_LOCATION_ERR: 849 case MAT_NEW_NONZERO_ALLOCATION_ERR: 850 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 851 break; 852 case MAT_ROW_ORIENTED: 853 a->roworiented = flg; 854 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 855 break; 856 case MAT_NEW_DIAGONALS: 857 case MAT_USE_HASH_TABLE: 858 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 859 break; 860 case MAT_IGNORE_OFF_PROC_ENTRIES: 861 a->donotstash = flg; 862 break; 863 case MAT_SYMMETRIC: 864 case MAT_STRUCTURALLY_SYMMETRIC: 865 case MAT_HERMITIAN: 866 case MAT_SYMMETRY_ETERNAL: 867 case MAT_IGNORE_LOWER_TRIANGULAR: 868 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 869 break; 870 default: 871 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 872 } 873 PetscFunctionReturn(0); 874 } 875 876 877 #undef __FUNCT__ 878 #define __FUNCT__ "MatDiagonalScale_MPIDense" 879 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 880 { 881 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 882 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 883 PetscScalar *l,*r,x,*v; 884 PetscErrorCode ierr; 885 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 886 887 PetscFunctionBegin; 888 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 889 if (ll) { 890 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 891 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 892 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 893 for (i=0; i<m; i++) { 894 x = l[i]; 895 v = mat->v + i; 896 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 897 } 898 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 899 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 900 } 901 if (rr) { 902 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 903 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 904 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 907 for (i=0; i<n; i++) { 908 x = r[i]; 909 v = mat->v + i*m; 910 for (j=0; j<m; j++) { (*v++) *= x;} 911 } 912 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 913 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 914 } 915 PetscFunctionReturn(0); 916 } 917 918 #undef __FUNCT__ 919 #define __FUNCT__ "MatNorm_MPIDense" 920 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 921 { 922 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 923 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 924 PetscErrorCode ierr; 925 PetscInt i,j; 926 PetscReal sum = 0.0; 927 PetscScalar *v = mat->v; 928 929 PetscFunctionBegin; 930 if (mdn->size == 1) { 931 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 932 } else { 933 if (type == NORM_FROBENIUS) { 934 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 935 #if defined(PETSC_USE_COMPLEX) 936 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 937 #else 938 sum += (*v)*(*v); v++; 939 #endif 940 } 941 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 942 *nrm = sqrt(*nrm); 943 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 944 } else if (type == NORM_1) { 945 PetscReal *tmp,*tmp2; 946 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 947 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 948 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 949 *nrm = 0.0; 950 v = mat->v; 951 for (j=0; j<mdn->A->cmap->n; j++) { 952 for (i=0; i<mdn->A->rmap->n; i++) { 953 tmp[j] += PetscAbsScalar(*v); v++; 954 } 955 } 956 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 957 for (j=0; j<A->cmap->N; j++) { 958 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 959 } 960 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 961 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 962 } else if (type == NORM_INFINITY) { /* max row norm */ 963 PetscReal ntemp; 964 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 965 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 966 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 967 } 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatTranspose_MPIDense" 973 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 974 { 975 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 976 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 977 Mat B; 978 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 979 PetscErrorCode ierr; 980 PetscInt j,i; 981 PetscScalar *v; 982 983 PetscFunctionBegin; 984 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 985 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 986 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 987 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 988 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 989 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 990 } else { 991 B = *matout; 992 } 993 994 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 995 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 996 for (i=0; i<m; i++) rwork[i] = rstart + i; 997 for (j=0; j<n; j++) { 998 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 999 v += m; 1000 } 1001 ierr = PetscFree(rwork);CHKERRQ(ierr); 1002 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1003 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1004 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1005 *matout = B; 1006 } else { 1007 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 1013 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1014 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1018 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1019 { 1020 PetscErrorCode ierr; 1021 1022 PetscFunctionBegin; 1023 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #if defined(PETSC_HAVE_PLAPACK) 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1031 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1032 { 1033 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1034 PetscErrorCode ierr; 1035 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1036 PetscScalar *array; 1037 PetscReal one = 1.0; 1038 1039 PetscFunctionBegin; 1040 /* Copy A into F->lu->A */ 1041 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1042 ierr = PLA_API_begin();CHKERRQ(ierr); 1043 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1044 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1045 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1046 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1047 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1048 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1049 ierr = PLA_API_end();CHKERRQ(ierr); 1050 lu->rstart = rstart; 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1056 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1057 { 1058 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1059 PetscErrorCode ierr; 1060 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1061 PetscScalar *array; 1062 PetscReal one = 1.0; 1063 1064 PetscFunctionBegin; 1065 /* Copy F into A->lu->A */ 1066 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1067 ierr = PLA_API_begin();CHKERRQ(ierr); 1068 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1069 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1070 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1071 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1072 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1073 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1074 ierr = PLA_API_end();CHKERRQ(ierr); 1075 lu->rstart = rstart; 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1081 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1082 { 1083 PetscErrorCode ierr; 1084 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1085 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1086 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1087 PLA_Obj alpha = NULL,beta = NULL; 1088 1089 PetscFunctionBegin; 1090 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1091 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1092 1093 /* 1094 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1095 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1096 */ 1097 1098 /* do the multiply in PLA */ 1099 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1100 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1101 CHKMEMQ; 1102 1103 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1104 CHKMEMQ; 1105 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1106 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1107 1108 /* 1109 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1110 */ 1111 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1117 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1118 { 1119 PetscErrorCode ierr; 1120 PetscInt m=A->rmap->n,n=B->cmap->n; 1121 Mat Cmat; 1122 1123 PetscFunctionBegin; 1124 if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1125 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1126 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1127 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1128 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1129 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1130 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1131 1132 *C = Cmat; 1133 PetscFunctionReturn(0); 1134 } 1135 1136 #undef __FUNCT__ 1137 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1138 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1139 { 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 if (scall == MAT_INITIAL_MATRIX){ 1144 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1145 } 1146 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatSolve_MPIDense" 1152 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1153 { 1154 MPI_Comm comm = ((PetscObject)A)->comm; 1155 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1156 PetscErrorCode ierr; 1157 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1158 PetscScalar *array; 1159 PetscReal one = 1.0; 1160 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1161 PLA_Obj v_pla = NULL; 1162 PetscScalar *loc_buf; 1163 Vec loc_x; 1164 1165 PetscFunctionBegin; 1166 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1167 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1168 1169 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1170 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1171 PLA_Obj_set_to_zero(v_pla); 1172 1173 /* Copy b into rhs_pla */ 1174 PLA_API_begin(); 1175 PLA_Obj_API_open(v_pla); 1176 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1177 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1178 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1179 PLA_Obj_API_close(v_pla); 1180 PLA_API_end(); 1181 1182 if (A->factortype == MAT_FACTOR_LU){ 1183 /* Apply the permutations to the right hand sides */ 1184 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1185 1186 /* Solve L y = b, overwriting b with y */ 1187 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1188 1189 /* Solve U x = y (=b), overwriting b with x */ 1190 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1191 } else { /* MAT_FACTOR_CHOLESKY */ 1192 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1193 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1194 PLA_NONUNIT_DIAG,lu->A,v_pla); 1195 } 1196 1197 /* Copy PLAPACK x into Petsc vector x */ 1198 PLA_Obj_local_length(v_pla, &loc_m); 1199 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1200 PLA_Obj_local_stride(v_pla, &loc_stride); 1201 /* 1202 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1203 */ 1204 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1205 if (!lu->pla_solved){ 1206 1207 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1208 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1209 1210 /* Create IS and cts for VecScatterring */ 1211 PLA_Obj_local_length(v_pla, &loc_m); 1212 PLA_Obj_local_stride(v_pla, &loc_stride); 1213 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1214 1215 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1216 for (i=0; i<loc_m; i+=lu->nb){ 1217 j = 0; 1218 while (j < lu->nb && i+j < loc_m){ 1219 idx_petsc[i+j] = rstart + j; j++; 1220 } 1221 rstart += size*lu->nb; 1222 } 1223 1224 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1225 1226 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);CHKERRQ(ierr); 1227 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);CHKERRQ(ierr); 1228 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1229 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1230 } 1231 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1233 1234 /* Free data */ 1235 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1236 PLA_Obj_free(&v_pla); 1237 1238 lu->pla_solved = PETSC_TRUE; 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1244 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1245 { 1246 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1247 PetscErrorCode ierr; 1248 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1249 PetscInt info_pla=0; 1250 PetscScalar *array,one = 1.0; 1251 1252 PetscFunctionBegin; 1253 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1254 PLA_Obj_free(&lu->A); 1255 PLA_Obj_free (&lu->pivots); 1256 } 1257 /* Create PLAPACK matrix object */ 1258 lu->A = NULL; lu->pivots = NULL; 1259 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1260 PLA_Obj_set_to_zero(lu->A); 1261 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1262 1263 /* Copy A into lu->A */ 1264 PLA_API_begin(); 1265 PLA_Obj_API_open(lu->A); 1266 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1267 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1268 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1269 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1270 PLA_Obj_API_close(lu->A); 1271 PLA_API_end(); 1272 1273 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1274 info_pla = PLA_LU(lu->A,lu->pivots); 1275 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1276 1277 lu->rstart = rstart; 1278 lu->mstruct = SAME_NONZERO_PATTERN; 1279 F->ops->solve = MatSolve_MPIDense; 1280 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1286 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1287 { 1288 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1289 PetscErrorCode ierr; 1290 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1291 PetscInt info_pla=0; 1292 PetscScalar *array,one = 1.0; 1293 1294 PetscFunctionBegin; 1295 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1296 PLA_Obj_free(&lu->A); 1297 } 1298 /* Create PLAPACK matrix object */ 1299 lu->A = NULL; 1300 lu->pivots = NULL; 1301 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1302 1303 /* Copy A into lu->A */ 1304 PLA_API_begin(); 1305 PLA_Obj_API_open(lu->A); 1306 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1307 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1308 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1309 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1310 PLA_Obj_API_close(lu->A); 1311 PLA_API_end(); 1312 1313 /* Factor P A -> Chol */ 1314 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1315 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1316 1317 lu->rstart = rstart; 1318 lu->mstruct = SAME_NONZERO_PATTERN; 1319 F->ops->solve = MatSolve_MPIDense; 1320 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1321 PetscFunctionReturn(0); 1322 } 1323 1324 /* Note the Petsc perm permutation is ignored */ 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1327 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1328 { 1329 PetscErrorCode ierr; 1330 PetscBool issymmetric,set; 1331 1332 PetscFunctionBegin; 1333 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1334 if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1335 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1336 PetscFunctionReturn(0); 1337 } 1338 1339 /* Note the Petsc r and c permutations are ignored */ 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1342 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1343 { 1344 PetscErrorCode ierr; 1345 PetscInt M = A->rmap->N; 1346 Mat_Plapack *lu; 1347 1348 PetscFunctionBegin; 1349 lu = (Mat_Plapack*)F->spptr; 1350 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1351 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1352 PetscFunctionReturn(0); 1353 } 1354 1355 EXTERN_C_BEGIN 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1358 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1359 { 1360 PetscFunctionBegin; 1361 *type = MATSOLVERPLAPACK; 1362 PetscFunctionReturn(0); 1363 } 1364 EXTERN_C_END 1365 1366 EXTERN_C_BEGIN 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1369 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1370 { 1371 PetscErrorCode ierr; 1372 Mat_Plapack *lu; 1373 PetscMPIInt size; 1374 PetscInt M=A->rmap->N; 1375 1376 PetscFunctionBegin; 1377 /* Create the factorization matrix */ 1378 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1379 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1380 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1381 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1382 (*F)->spptr = (void*)lu; 1383 1384 /* Set default Plapack parameters */ 1385 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1386 lu->nb = M/size; 1387 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1388 1389 /* Set runtime options */ 1390 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1391 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1392 PetscOptionsEnd(); 1393 1394 /* Create object distribution template */ 1395 lu->templ = NULL; 1396 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1397 1398 /* Set the datatype */ 1399 #if defined(PETSC_USE_COMPLEX) 1400 lu->datatype = MPI_DOUBLE_COMPLEX; 1401 #else 1402 lu->datatype = MPI_DOUBLE; 1403 #endif 1404 1405 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1406 1407 1408 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1409 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1410 1411 if (ftype == MAT_FACTOR_LU) { 1412 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1413 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1414 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1415 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1416 (*F)->factortype = ftype; 1417 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 EXTERN_C_END 1421 #endif 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1425 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1426 { 1427 #if defined(PETSC_HAVE_PLAPACK) 1428 PetscErrorCode ierr; 1429 #endif 1430 1431 PetscFunctionBegin; 1432 #if defined(PETSC_HAVE_PLAPACK) 1433 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1434 #else 1435 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1436 #endif 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatAXPY_MPIDense" 1442 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1443 { 1444 PetscErrorCode ierr; 1445 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1446 1447 PetscFunctionBegin; 1448 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatConjugate_MPIDense" 1454 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat) 1455 { 1456 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1457 PetscErrorCode ierr; 1458 1459 PetscFunctionBegin; 1460 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatRealPart_MPIDense" 1466 PetscErrorCode MatRealPart_MPIDense(Mat A) 1467 { 1468 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1469 PetscErrorCode ierr; 1470 1471 PetscFunctionBegin; 1472 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1478 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1479 { 1480 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1481 PetscErrorCode ierr; 1482 1483 PetscFunctionBegin; 1484 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 /* -------------------------------------------------------------------*/ 1489 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1490 MatGetRow_MPIDense, 1491 MatRestoreRow_MPIDense, 1492 MatMult_MPIDense, 1493 /* 4*/ MatMultAdd_MPIDense, 1494 MatMultTranspose_MPIDense, 1495 MatMultTransposeAdd_MPIDense, 1496 0, 1497 0, 1498 0, 1499 /*10*/ 0, 1500 0, 1501 0, 1502 0, 1503 MatTranspose_MPIDense, 1504 /*15*/ MatGetInfo_MPIDense, 1505 MatEqual_MPIDense, 1506 MatGetDiagonal_MPIDense, 1507 MatDiagonalScale_MPIDense, 1508 MatNorm_MPIDense, 1509 /*20*/ MatAssemblyBegin_MPIDense, 1510 MatAssemblyEnd_MPIDense, 1511 MatSetOption_MPIDense, 1512 MatZeroEntries_MPIDense, 1513 /*24*/ MatZeroRows_MPIDense, 1514 0, 1515 0, 1516 0, 1517 0, 1518 /*29*/ MatSetUpPreallocation_MPIDense, 1519 0, 1520 0, 1521 MatGetArray_MPIDense, 1522 MatRestoreArray_MPIDense, 1523 /*34*/ MatDuplicate_MPIDense, 1524 0, 1525 0, 1526 0, 1527 0, 1528 /*39*/ MatAXPY_MPIDense, 1529 MatGetSubMatrices_MPIDense, 1530 0, 1531 MatGetValues_MPIDense, 1532 0, 1533 /*44*/ 0, 1534 MatScale_MPIDense, 1535 0, 1536 0, 1537 0, 1538 /*49*/ 0, 1539 0, 1540 0, 1541 0, 1542 0, 1543 /*54*/ 0, 1544 0, 1545 0, 1546 0, 1547 0, 1548 /*59*/ MatGetSubMatrix_MPIDense, 1549 MatDestroy_MPIDense, 1550 MatView_MPIDense, 1551 0, 1552 0, 1553 /*64*/ 0, 1554 0, 1555 0, 1556 0, 1557 0, 1558 /*69*/ 0, 1559 0, 1560 0, 1561 0, 1562 0, 1563 /*74*/ 0, 1564 0, 1565 0, 1566 0, 1567 0, 1568 /*79*/ 0, 1569 0, 1570 0, 1571 0, 1572 /*83*/ MatLoad_MPIDense, 1573 0, 1574 0, 1575 0, 1576 0, 1577 0, 1578 /*89*/ 1579 #if defined(PETSC_HAVE_PLAPACK) 1580 MatMatMult_MPIDense_MPIDense, 1581 MatMatMultSymbolic_MPIDense_MPIDense, 1582 MatMatMultNumeric_MPIDense_MPIDense, 1583 #else 1584 0, 1585 0, 1586 0, 1587 #endif 1588 0, 1589 0, 1590 /*94*/ 0, 1591 0, 1592 0, 1593 0, 1594 0, 1595 /*99*/ 0, 1596 0, 1597 0, 1598 MatConjugate_MPIDense, 1599 0, 1600 /*104*/0, 1601 MatRealPart_MPIDense, 1602 MatImaginaryPart_MPIDense, 1603 0, 1604 0, 1605 /*109*/0, 1606 0, 1607 0, 1608 0, 1609 0, 1610 /*114*/0, 1611 0, 1612 0, 1613 0, 1614 0, 1615 /*119*/0, 1616 0, 1617 0, 1618 0 1619 }; 1620 1621 EXTERN_C_BEGIN 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1624 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1625 { 1626 Mat_MPIDense *a; 1627 PetscErrorCode ierr; 1628 1629 PetscFunctionBegin; 1630 mat->preallocated = PETSC_TRUE; 1631 /* Note: For now, when data is specified above, this assumes the user correctly 1632 allocates the local dense storage space. We should add error checking. */ 1633 1634 a = (Mat_MPIDense*)mat->data; 1635 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1636 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1637 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1638 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1639 a->nvec = mat->cmap->n; 1640 1641 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1642 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1643 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1644 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1645 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1646 PetscFunctionReturn(0); 1647 } 1648 EXTERN_C_END 1649 1650 /*MC 1651 MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1652 1653 run ./configure with the option --download-plapack 1654 1655 1656 Options Database Keys: 1657 . -mat_plapack_nprows <n> - number of rows in processor partition 1658 . -mat_plapack_npcols <n> - number of columns in processor partition 1659 . -mat_plapack_nb <n> - block size of template vector 1660 . -mat_plapack_nb_alg <n> - algorithmic block size 1661 - -mat_plapack_ckerror <n> - error checking flag 1662 1663 Level: intermediate 1664 1665 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1666 1667 M*/ 1668 1669 EXTERN_C_BEGIN 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "MatCreate_MPIDense" 1672 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1673 { 1674 Mat_MPIDense *a; 1675 PetscErrorCode ierr; 1676 1677 PetscFunctionBegin; 1678 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1679 mat->data = (void*)a; 1680 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1681 1682 mat->insertmode = NOT_SET_VALUES; 1683 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1684 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1685 1686 /* build cache for off array entries formed */ 1687 a->donotstash = PETSC_FALSE; 1688 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1689 1690 /* stuff used for matrix vector multiply */ 1691 a->lvec = 0; 1692 a->Mvctx = 0; 1693 a->roworiented = PETSC_TRUE; 1694 1695 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1696 "MatGetDiagonalBlock_MPIDense", 1697 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1698 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1699 "MatMPIDenseSetPreallocation_MPIDense", 1700 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1701 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1702 "MatMatMult_MPIAIJ_MPIDense", 1703 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1704 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1705 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1706 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1707 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1708 "MatMatMultNumeric_MPIAIJ_MPIDense", 1709 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1710 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1711 "MatGetFactor_mpidense_petsc", 1712 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1713 #if defined(PETSC_HAVE_PLAPACK) 1714 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1715 "MatGetFactor_mpidense_plapack", 1716 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1717 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1718 #endif 1719 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1720 1721 PetscFunctionReturn(0); 1722 } 1723 EXTERN_C_END 1724 1725 /*MC 1726 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1727 1728 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1729 and MATMPIDENSE otherwise. 1730 1731 Options Database Keys: 1732 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1733 1734 Level: beginner 1735 1736 1737 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1738 M*/ 1739 1740 EXTERN_C_BEGIN 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatCreate_Dense" 1743 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1744 { 1745 PetscErrorCode ierr; 1746 PetscMPIInt size; 1747 1748 PetscFunctionBegin; 1749 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1750 if (size == 1) { 1751 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1752 } else { 1753 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1754 } 1755 PetscFunctionReturn(0); 1756 } 1757 EXTERN_C_END 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1761 /*@C 1762 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1763 1764 Not collective 1765 1766 Input Parameters: 1767 . A - the matrix 1768 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1769 to control all matrix memory allocation. 1770 1771 Notes: 1772 The dense format is fully compatible with standard Fortran 77 1773 storage by columns. 1774 1775 The data input variable is intended primarily for Fortran programmers 1776 who wish to allocate their own matrix memory space. Most users should 1777 set data=PETSC_NULL. 1778 1779 Level: intermediate 1780 1781 .keywords: matrix,dense, parallel 1782 1783 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1784 @*/ 1785 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1786 { 1787 PetscErrorCode ierr; 1788 1789 PetscFunctionBegin; 1790 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatCreateMPIDense" 1796 /*@C 1797 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1798 1799 Collective on MPI_Comm 1800 1801 Input Parameters: 1802 + comm - MPI communicator 1803 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1804 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1805 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1806 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1807 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1808 to control all matrix memory allocation. 1809 1810 Output Parameter: 1811 . A - the matrix 1812 1813 Notes: 1814 The dense format is fully compatible with standard Fortran 77 1815 storage by columns. 1816 1817 The data input variable is intended primarily for Fortran programmers 1818 who wish to allocate their own matrix memory space. Most users should 1819 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1820 1821 The user MUST specify either the local or global matrix dimensions 1822 (possibly both). 1823 1824 Level: intermediate 1825 1826 .keywords: matrix,dense, parallel 1827 1828 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1829 @*/ 1830 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1831 { 1832 PetscErrorCode ierr; 1833 PetscMPIInt size; 1834 1835 PetscFunctionBegin; 1836 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1837 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1838 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1839 if (size > 1) { 1840 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1841 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1842 } else { 1843 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1844 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1845 } 1846 PetscFunctionReturn(0); 1847 } 1848 1849 #undef __FUNCT__ 1850 #define __FUNCT__ "MatDuplicate_MPIDense" 1851 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1852 { 1853 Mat mat; 1854 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1855 PetscErrorCode ierr; 1856 1857 PetscFunctionBegin; 1858 *newmat = 0; 1859 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1860 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1861 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1862 a = (Mat_MPIDense*)mat->data; 1863 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1864 1865 mat->factortype = A->factortype; 1866 mat->assembled = PETSC_TRUE; 1867 mat->preallocated = PETSC_TRUE; 1868 1869 a->size = oldmat->size; 1870 a->rank = oldmat->rank; 1871 mat->insertmode = NOT_SET_VALUES; 1872 a->nvec = oldmat->nvec; 1873 a->donotstash = oldmat->donotstash; 1874 1875 ierr = PetscLayoutCopy(A->rmap,&mat->rmap);CHKERRQ(ierr); 1876 ierr = PetscLayoutCopy(A->cmap,&mat->cmap);CHKERRQ(ierr); 1877 1878 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1879 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1880 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1881 1882 *newmat = mat; 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1888 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1889 { 1890 PetscErrorCode ierr; 1891 PetscMPIInt rank,size; 1892 PetscInt *rowners,i,m,nz,j; 1893 PetscScalar *array,*vals,*vals_ptr; 1894 MPI_Status status; 1895 1896 PetscFunctionBegin; 1897 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1898 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1899 1900 /* determine ownership of all rows */ 1901 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1902 else m = newmat->rmap->n; 1903 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1904 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1905 rowners[0] = 0; 1906 for (i=2; i<=size; i++) { 1907 rowners[i] += rowners[i-1]; 1908 } 1909 1910 if (!sizesset) { 1911 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1912 } 1913 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1914 ierr = MatGetArray(newmat,&array);CHKERRQ(ierr); 1915 1916 if (!rank) { 1917 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1918 1919 /* read in my part of the matrix numerical values */ 1920 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1921 1922 /* insert into matrix-by row (this is why cannot directly read into array */ 1923 vals_ptr = vals; 1924 for (i=0; i<m; i++) { 1925 for (j=0; j<N; j++) { 1926 array[i + j*m] = *vals_ptr++; 1927 } 1928 } 1929 1930 /* read in other processors and ship out */ 1931 for (i=1; i<size; i++) { 1932 nz = (rowners[i+1] - rowners[i])*N; 1933 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1934 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1935 } 1936 } else { 1937 /* receive numeric values */ 1938 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1939 1940 /* receive message of values*/ 1941 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr); 1942 1943 /* insert into matrix-by row (this is why cannot directly read into array */ 1944 vals_ptr = vals; 1945 for (i=0; i<m; i++) { 1946 for (j=0; j<N; j++) { 1947 array[i + j*m] = *vals_ptr++; 1948 } 1949 } 1950 } 1951 ierr = PetscFree(rowners);CHKERRQ(ierr); 1952 ierr = PetscFree(vals);CHKERRQ(ierr); 1953 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1954 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #undef __FUNCT__ 1959 #define __FUNCT__ "MatLoad_MPIDense" 1960 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1961 { 1962 PetscScalar *vals,*svals; 1963 MPI_Comm comm = ((PetscObject)viewer)->comm; 1964 MPI_Status status; 1965 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1966 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1967 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1968 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1969 int fd; 1970 PetscErrorCode ierr; 1971 1972 PetscFunctionBegin; 1973 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1974 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1975 if (!rank) { 1976 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1977 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1978 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1979 } 1980 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1981 1982 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1983 M = header[1]; N = header[2]; nz = header[3]; 1984 1985 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1986 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1987 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1988 1989 /* If global sizes are set, check if they are consistent with that given in the file */ 1990 if (sizesset) { 1991 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1992 } 1993 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1994 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1995 1996 /* 1997 Handle case where matrix is stored on disk as a dense matrix 1998 */ 1999 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 2000 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 2001 PetscFunctionReturn(0); 2002 } 2003 2004 /* determine ownership of all rows */ 2005 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 2006 else m = PetscMPIIntCast(newmat->rmap->n); 2007 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2008 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2009 rowners[0] = 0; 2010 for (i=2; i<=size; i++) { 2011 rowners[i] += rowners[i-1]; 2012 } 2013 rstart = rowners[rank]; 2014 rend = rowners[rank+1]; 2015 2016 /* distribute row lengths to all processors */ 2017 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 2018 if (!rank) { 2019 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2020 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2021 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2022 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2023 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2024 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2025 } else { 2026 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2027 } 2028 2029 if (!rank) { 2030 /* calculate the number of nonzeros on each processor */ 2031 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2032 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2033 for (i=0; i<size; i++) { 2034 for (j=rowners[i]; j< rowners[i+1]; j++) { 2035 procsnz[i] += rowlengths[j]; 2036 } 2037 } 2038 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2039 2040 /* determine max buffer needed and allocate it */ 2041 maxnz = 0; 2042 for (i=0; i<size; i++) { 2043 maxnz = PetscMax(maxnz,procsnz[i]); 2044 } 2045 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2046 2047 /* read in my part of the matrix column indices */ 2048 nz = procsnz[0]; 2049 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2050 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2051 2052 /* read in every one elses and ship off */ 2053 for (i=1; i<size; i++) { 2054 nz = procsnz[i]; 2055 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2056 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2057 } 2058 ierr = PetscFree(cols);CHKERRQ(ierr); 2059 } else { 2060 /* determine buffer space needed for message */ 2061 nz = 0; 2062 for (i=0; i<m; i++) { 2063 nz += ourlens[i]; 2064 } 2065 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2066 2067 /* receive message of column indices*/ 2068 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2069 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2070 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2071 } 2072 2073 /* loop over local rows, determining number of off diagonal entries */ 2074 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2075 jj = 0; 2076 for (i=0; i<m; i++) { 2077 for (j=0; j<ourlens[i]; j++) { 2078 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2079 jj++; 2080 } 2081 } 2082 2083 /* create our matrix */ 2084 for (i=0; i<m; i++) { 2085 ourlens[i] -= offlens[i]; 2086 } 2087 2088 if (!sizesset) { 2089 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2090 } 2091 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 2092 for (i=0; i<m; i++) { 2093 ourlens[i] += offlens[i]; 2094 } 2095 2096 if (!rank) { 2097 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2098 2099 /* read in my part of the matrix numerical values */ 2100 nz = procsnz[0]; 2101 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2102 2103 /* insert into matrix */ 2104 jj = rstart; 2105 smycols = mycols; 2106 svals = vals; 2107 for (i=0; i<m; i++) { 2108 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2109 smycols += ourlens[i]; 2110 svals += ourlens[i]; 2111 jj++; 2112 } 2113 2114 /* read in other processors and ship out */ 2115 for (i=1; i<size; i++) { 2116 nz = procsnz[i]; 2117 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2118 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2119 } 2120 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2121 } else { 2122 /* receive numeric values */ 2123 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2124 2125 /* receive message of values*/ 2126 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2127 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2128 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2129 2130 /* insert into matrix */ 2131 jj = rstart; 2132 smycols = mycols; 2133 svals = vals; 2134 for (i=0; i<m; i++) { 2135 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2136 smycols += ourlens[i]; 2137 svals += ourlens[i]; 2138 jj++; 2139 } 2140 } 2141 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2142 ierr = PetscFree(vals);CHKERRQ(ierr); 2143 ierr = PetscFree(mycols);CHKERRQ(ierr); 2144 ierr = PetscFree(rowners);CHKERRQ(ierr); 2145 2146 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2147 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatEqual_MPIDense" 2153 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2154 { 2155 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2156 Mat a,b; 2157 PetscBool flg; 2158 PetscErrorCode ierr; 2159 2160 PetscFunctionBegin; 2161 a = matA->A; 2162 b = matB->A; 2163 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2164 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #if defined(PETSC_HAVE_PLAPACK) 2169 2170 #undef __FUNCT__ 2171 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2172 /*@C 2173 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2174 Level: developer 2175 2176 .keywords: Petsc, destroy, package, PLAPACK 2177 .seealso: PetscFinalize() 2178 @*/ 2179 PetscErrorCode PETSCMAT_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2180 { 2181 PetscErrorCode ierr; 2182 2183 PetscFunctionBegin; 2184 ierr = PLA_Finalize();CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 #undef __FUNCT__ 2189 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2190 /*@C 2191 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2192 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2193 2194 Input Parameter: 2195 . comm - the communicator the matrix lives on 2196 2197 Level: developer 2198 2199 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2200 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2201 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2202 cannot overlap. 2203 2204 .keywords: Petsc, initialize, package, PLAPACK 2205 .seealso: PetscSysInitializePackage(), PetscInitialize() 2206 @*/ 2207 PetscErrorCode PETSCMAT_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2208 { 2209 PetscMPIInt size; 2210 PetscErrorCode ierr; 2211 2212 PetscFunctionBegin; 2213 if (!PLA_Initialized(PETSC_NULL)) { 2214 2215 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2216 Plapack_nprows = 1; 2217 Plapack_npcols = size; 2218 2219 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2220 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2221 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2222 #if defined(PETSC_USE_DEBUG) 2223 Plapack_ierror = 3; 2224 #else 2225 Plapack_ierror = 0; 2226 #endif 2227 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2228 if (Plapack_ierror){ 2229 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2230 } else { 2231 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2232 } 2233 2234 Plapack_nb_alg = 0; 2235 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2236 if (Plapack_nb_alg) { 2237 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2238 } 2239 PetscOptionsEnd(); 2240 2241 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2242 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2243 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2244 } 2245 PetscFunctionReturn(0); 2246 } 2247 2248 #endif 2249