1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 EXTERN_C_BEGIN 13 #include "PLA.h" 14 EXTERN_C_END 15 16 typedef struct { 17 PLA_Obj A,pivots; 18 PLA_Template templ; 19 MPI_Datatype datatype; 20 PetscInt nb,rstart; 21 VecScatter ctx; 22 IS is_pla,is_petsc; 23 PetscTruth pla_solved; 24 MatStructure mstruct; 25 } Mat_Plapack; 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatDenseGetLocalMatrix" 30 /*@ 31 32 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 33 matrix that represents the operator. For sequential matrices it returns itself. 34 35 Input Parameter: 36 . A - the Seq or MPI dense matrix 37 38 Output Parameter: 39 . B - the inner matrix 40 41 Level: intermediate 42 43 @*/ 44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 45 { 46 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 47 PetscErrorCode ierr; 48 PetscTruth flg; 49 50 PetscFunctionBegin; 51 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 52 if (flg) { 53 *B = mat->A; 54 } else { 55 *B = A; 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatGetRow_MPIDense" 62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 63 { 64 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 65 PetscErrorCode ierr; 66 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 67 68 PetscFunctionBegin; 69 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 70 lrow = row - rstart; 71 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatRestoreRow_MPIDense" 77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 83 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 84 PetscFunctionReturn(0); 85 } 86 87 EXTERN_C_BEGIN 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 91 { 92 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 93 PetscErrorCode ierr; 94 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 95 PetscScalar *array; 96 MPI_Comm comm; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 100 101 /* The reuse aspect is not implemented efficiently */ 102 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 103 104 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 105 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 106 ierr = MatCreate(comm,B);CHKERRQ(ierr); 107 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 108 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 109 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 110 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 111 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 113 114 *iscopy = PETSC_TRUE; 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatSetValues_MPIDense" 121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 122 { 123 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 124 PetscErrorCode ierr; 125 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 126 PetscTruth roworiented = A->roworiented; 127 128 PetscFunctionBegin; 129 if (v) PetscValidScalarPointer(v,6); 130 for (i=0; i<m; i++) { 131 if (idxm[i] < 0) continue; 132 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 133 if (idxm[i] >= rstart && idxm[i] < rend) { 134 row = idxm[i] - rstart; 135 if (roworiented) { 136 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 137 } else { 138 for (j=0; j<n; j++) { 139 if (idxn[j] < 0) continue; 140 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 141 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 142 } 143 } 144 } else { 145 if (!A->donotstash) { 146 if (roworiented) { 147 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 148 } else { 149 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 150 } 151 } 152 } 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetValues_MPIDense" 159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 160 { 161 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 164 165 PetscFunctionBegin; 166 for (i=0; i<m; i++) { 167 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 168 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 169 if (idxm[i] >= rstart && idxm[i] < rend) { 170 row = idxm[i] - rstart; 171 for (j=0; j<n; j++) { 172 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 173 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 174 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 175 } 176 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 177 } 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatGetArray_MPIDense" 183 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 184 { 185 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 186 PetscErrorCode ierr; 187 188 PetscFunctionBegin; 189 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 195 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 196 { 197 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 198 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 199 PetscErrorCode ierr; 200 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 201 const PetscInt *irow,*icol; 202 PetscScalar *av,*bv,*v = lmat->v; 203 Mat newmat; 204 IS iscol_local; 205 206 PetscFunctionBegin; 207 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 208 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 209 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 210 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 211 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 212 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 213 214 /* No parallel redistribution currently supported! Should really check each index set 215 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 216 original matrix! */ 217 218 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 219 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 220 221 /* Check submatrix call */ 222 if (scall == MAT_REUSE_MATRIX) { 223 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 224 /* Really need to test rows and column sizes! */ 225 newmat = *B; 226 } else { 227 /* Create and fill new matrix */ 228 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 229 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 230 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 231 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 232 } 233 234 /* Now extract the data pointers and do the copy, column at a time */ 235 newmatd = (Mat_MPIDense*)newmat->data; 236 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 237 238 for (i=0; i<Ncols; i++) { 239 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 240 for (j=0; j<nrows; j++) { 241 *bv++ = av[irow[j] - rstart]; 242 } 243 } 244 245 /* Assemble the matrices so that the correct flags are set */ 246 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 248 249 /* Free work space */ 250 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 251 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 252 *B = newmat; 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatRestoreArray_MPIDense" 258 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 259 { 260 PetscFunctionBegin; 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 266 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 267 { 268 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 269 MPI_Comm comm = ((PetscObject)mat)->comm; 270 PetscErrorCode ierr; 271 PetscInt nstash,reallocs; 272 InsertMode addv; 273 274 PetscFunctionBegin; 275 /* make sure all processors are either in INSERTMODE or ADDMODE */ 276 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 277 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 278 mat->insertmode = addv; /* in case this processor had no cache */ 279 280 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 281 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 282 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 288 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 289 { 290 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 291 PetscErrorCode ierr; 292 PetscInt i,*row,*col,flg,j,rstart,ncols; 293 PetscMPIInt n; 294 PetscScalar *val; 295 InsertMode addv=mat->insertmode; 296 297 PetscFunctionBegin; 298 /* wait on receives */ 299 while (1) { 300 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 301 if (!flg) break; 302 303 for (i=0; i<n;) { 304 /* Now identify the consecutive vals belonging to the same row */ 305 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 306 if (j < n) ncols = j-i; 307 else ncols = n-i; 308 /* Now assemble all these values with a single function call */ 309 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 310 i = j; 311 } 312 } 313 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 314 315 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 316 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 317 318 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 319 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatZeroEntries_MPIDense" 326 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 327 { 328 PetscErrorCode ierr; 329 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 330 331 PetscFunctionBegin; 332 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 333 PetscFunctionReturn(0); 334 } 335 336 /* the code does not do the diagonal entries correctly unless the 337 matrix is square and the column and row owerships are identical. 338 This is a BUG. The only way to fix it seems to be to access 339 mdn->A and mdn->B directly and not through the MatZeroRows() 340 routine. 341 */ 342 #undef __FUNCT__ 343 #define __FUNCT__ "MatZeroRows_MPIDense" 344 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 345 { 346 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 347 PetscErrorCode ierr; 348 PetscInt i,*owners = A->rmap->range; 349 PetscInt *nprocs,j,idx,nsends; 350 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 351 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 352 PetscInt *lens,*lrows,*values; 353 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 354 MPI_Comm comm = ((PetscObject)A)->comm; 355 MPI_Request *send_waits,*recv_waits; 356 MPI_Status recv_status,*send_status; 357 PetscTruth found; 358 359 PetscFunctionBegin; 360 /* first count number of contributors to each processor */ 361 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 362 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 363 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 364 for (i=0; i<N; i++) { 365 idx = rows[i]; 366 found = PETSC_FALSE; 367 for (j=0; j<size; j++) { 368 if (idx >= owners[j] && idx < owners[j+1]) { 369 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 370 } 371 } 372 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 373 } 374 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 375 376 /* inform other processors of number of messages and max length*/ 377 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 378 379 /* post receives: */ 380 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 381 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 382 for (i=0; i<nrecvs; i++) { 383 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 384 } 385 386 /* do sends: 387 1) starts[i] gives the starting index in svalues for stuff going to 388 the ith processor 389 */ 390 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 391 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 392 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 393 starts[0] = 0; 394 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 395 for (i=0; i<N; i++) { 396 svalues[starts[owner[i]]++] = rows[i]; 397 } 398 399 starts[0] = 0; 400 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 401 count = 0; 402 for (i=0; i<size; i++) { 403 if (nprocs[2*i+1]) { 404 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 405 } 406 } 407 ierr = PetscFree(starts);CHKERRQ(ierr); 408 409 base = owners[rank]; 410 411 /* wait on receives */ 412 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 413 count = nrecvs; 414 slen = 0; 415 while (count) { 416 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 417 /* unpack receives into our local space */ 418 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 419 source[imdex] = recv_status.MPI_SOURCE; 420 lens[imdex] = n; 421 slen += n; 422 count--; 423 } 424 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 425 426 /* move the data into the send scatter */ 427 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 428 count = 0; 429 for (i=0; i<nrecvs; i++) { 430 values = rvalues + i*nmax; 431 for (j=0; j<lens[i]; j++) { 432 lrows[count++] = values[j] - base; 433 } 434 } 435 ierr = PetscFree(rvalues);CHKERRQ(ierr); 436 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 437 ierr = PetscFree(owner);CHKERRQ(ierr); 438 ierr = PetscFree(nprocs);CHKERRQ(ierr); 439 440 /* actually zap the local rows */ 441 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 442 ierr = PetscFree(lrows);CHKERRQ(ierr); 443 444 /* wait on sends */ 445 if (nsends) { 446 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 447 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 448 ierr = PetscFree(send_status);CHKERRQ(ierr); 449 } 450 ierr = PetscFree(send_waits);CHKERRQ(ierr); 451 ierr = PetscFree(svalues);CHKERRQ(ierr); 452 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatMult_MPIDense" 458 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 459 { 460 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 465 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 467 PetscFunctionReturn(0); 468 } 469 470 #undef __FUNCT__ 471 #define __FUNCT__ "MatMultAdd_MPIDense" 472 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 477 PetscFunctionBegin; 478 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 480 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatMultTranspose_MPIDense" 486 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 487 { 488 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 489 PetscErrorCode ierr; 490 PetscScalar zero = 0.0; 491 492 PetscFunctionBegin; 493 ierr = VecSet(yy,zero);CHKERRQ(ierr); 494 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 495 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 496 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 502 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 503 { 504 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 509 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 510 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 511 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 512 PetscFunctionReturn(0); 513 } 514 515 #undef __FUNCT__ 516 #define __FUNCT__ "MatGetDiagonal_MPIDense" 517 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 518 { 519 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 520 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 521 PetscErrorCode ierr; 522 PetscInt len,i,n,m = A->rmap->n,radd; 523 PetscScalar *x,zero = 0.0; 524 525 PetscFunctionBegin; 526 ierr = VecSet(v,zero);CHKERRQ(ierr); 527 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 528 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 529 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 530 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 531 radd = A->rmap->rstart*m; 532 for (i=0; i<len; i++) { 533 x[i] = aloc->v[radd + i*m + i]; 534 } 535 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatDestroy_MPIDense" 541 PetscErrorCode MatDestroy_MPIDense(Mat mat) 542 { 543 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 544 PetscErrorCode ierr; 545 #if defined(PETSC_HAVE_PLAPACK) 546 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 547 #endif 548 549 PetscFunctionBegin; 550 551 #if defined(PETSC_USE_LOG) 552 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 553 #endif 554 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 555 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 556 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 557 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 558 #if defined(PETSC_HAVE_PLAPACK) 559 if (lu) { 560 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 561 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 562 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 563 564 if (lu->is_pla) { 565 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 566 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 567 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 568 } 569 } 570 #endif 571 572 ierr = PetscFree(mdn);CHKERRQ(ierr); 573 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatView_MPIDense_Binary" 584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 585 { 586 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 587 PetscErrorCode ierr; 588 PetscViewerFormat format; 589 int fd; 590 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 591 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 592 PetscScalar *work,*v,*vv; 593 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 594 MPI_Status status; 595 596 PetscFunctionBegin; 597 if (mdn->size == 1) { 598 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 599 } else { 600 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 601 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 602 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 603 604 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 605 if (format == PETSC_VIEWER_NATIVE) { 606 607 if (!rank) { 608 /* store the matrix as a dense matrix */ 609 header[0] = MAT_FILE_CLASSID; 610 header[1] = mat->rmap->N; 611 header[2] = N; 612 header[3] = MATRIX_BINARY_FORMAT_DENSE; 613 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 614 615 /* get largest work array needed for transposing array */ 616 mmax = mat->rmap->n; 617 for (i=1; i<size; i++) { 618 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 619 } 620 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 621 622 /* write out local array, by rows */ 623 m = mat->rmap->n; 624 v = a->v; 625 for (j=0; j<N; j++) { 626 for (i=0; i<m; i++) { 627 work[j + i*N] = *v++; 628 } 629 } 630 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 631 /* get largest work array to receive messages from other processes, excludes process zero */ 632 mmax = 0; 633 for (i=1; i<size; i++) { 634 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 635 } 636 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 637 for(k = 1; k < size; k++) { 638 v = vv; 639 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 640 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 641 642 for(j = 0; j < N; j++) { 643 for(i = 0; i < m; i++) { 644 work[j + i*N] = *v++; 645 } 646 } 647 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 648 } 649 ierr = PetscFree(work);CHKERRQ(ierr); 650 ierr = PetscFree(vv);CHKERRQ(ierr); 651 } else { 652 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 653 } 654 } else { 655 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 656 } 657 } 658 PetscFunctionReturn(0); 659 } 660 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 663 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 664 { 665 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 666 PetscErrorCode ierr; 667 PetscMPIInt size = mdn->size,rank = mdn->rank; 668 const PetscViewerType vtype; 669 PetscTruth iascii,isdraw; 670 PetscViewer sviewer; 671 PetscViewerFormat format; 672 #if defined(PETSC_HAVE_PLAPACK) 673 Mat_Plapack *lu; 674 #endif 675 676 PetscFunctionBegin; 677 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 678 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 679 if (iascii) { 680 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 681 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 682 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 683 MatInfo info; 684 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 685 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 686 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 687 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 688 #if defined(PETSC_HAVE_PLAPACK) 689 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 690 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 691 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 692 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 693 if (mat->factortype){ 694 lu=(Mat_Plapack*)(mat->spptr); 695 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 696 } 697 #else 698 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 699 #endif 700 PetscFunctionReturn(0); 701 } else if (format == PETSC_VIEWER_ASCII_INFO) { 702 PetscFunctionReturn(0); 703 } 704 } else if (isdraw) { 705 PetscDraw draw; 706 PetscTruth isnull; 707 708 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 709 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 710 if (isnull) PetscFunctionReturn(0); 711 } 712 713 if (size == 1) { 714 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 715 } else { 716 /* assemble the entire matrix onto first processor. */ 717 Mat A; 718 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 719 PetscInt *cols; 720 PetscScalar *vals; 721 722 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 723 if (!rank) { 724 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 725 } else { 726 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 727 } 728 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 729 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 730 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 731 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 732 733 /* Copy the matrix ... This isn't the most efficient means, 734 but it's quick for now */ 735 A->insertmode = INSERT_VALUES; 736 row = mat->rmap->rstart; m = mdn->A->rmap->n; 737 for (i=0; i<m; i++) { 738 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 739 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 740 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 741 row++; 742 } 743 744 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 745 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 746 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 747 if (!rank) { 748 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 749 } 750 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 751 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 752 ierr = MatDestroy(A);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatView_MPIDense" 759 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 760 { 761 PetscErrorCode ierr; 762 PetscTruth iascii,isbinary,isdraw,issocket; 763 764 PetscFunctionBegin; 765 766 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 767 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 768 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 769 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 770 771 if (iascii || issocket || isdraw) { 772 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 773 } else if (isbinary) { 774 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 775 } else { 776 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 777 } 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatGetInfo_MPIDense" 783 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 784 { 785 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 786 Mat mdn = mat->A; 787 PetscErrorCode ierr; 788 PetscReal isend[5],irecv[5]; 789 790 PetscFunctionBegin; 791 info->block_size = 1.0; 792 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 793 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 794 isend[3] = info->memory; isend[4] = info->mallocs; 795 if (flag == MAT_LOCAL) { 796 info->nz_used = isend[0]; 797 info->nz_allocated = isend[1]; 798 info->nz_unneeded = isend[2]; 799 info->memory = isend[3]; 800 info->mallocs = isend[4]; 801 } else if (flag == MAT_GLOBAL_MAX) { 802 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 803 info->nz_used = irecv[0]; 804 info->nz_allocated = irecv[1]; 805 info->nz_unneeded = irecv[2]; 806 info->memory = irecv[3]; 807 info->mallocs = irecv[4]; 808 } else if (flag == MAT_GLOBAL_SUM) { 809 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 810 info->nz_used = irecv[0]; 811 info->nz_allocated = irecv[1]; 812 info->nz_unneeded = irecv[2]; 813 info->memory = irecv[3]; 814 info->mallocs = irecv[4]; 815 } 816 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 817 info->fill_ratio_needed = 0; 818 info->factor_mallocs = 0; 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatSetOption_MPIDense" 824 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 825 { 826 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 switch (op) { 831 case MAT_NEW_NONZERO_LOCATIONS: 832 case MAT_NEW_NONZERO_LOCATION_ERR: 833 case MAT_NEW_NONZERO_ALLOCATION_ERR: 834 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 835 break; 836 case MAT_ROW_ORIENTED: 837 a->roworiented = flg; 838 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 839 break; 840 case MAT_NEW_DIAGONALS: 841 case MAT_USE_HASH_TABLE: 842 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 843 break; 844 case MAT_IGNORE_OFF_PROC_ENTRIES: 845 a->donotstash = flg; 846 break; 847 case MAT_SYMMETRIC: 848 case MAT_STRUCTURALLY_SYMMETRIC: 849 case MAT_HERMITIAN: 850 case MAT_SYMMETRY_ETERNAL: 851 case MAT_IGNORE_LOWER_TRIANGULAR: 852 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 853 break; 854 default: 855 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 856 } 857 PetscFunctionReturn(0); 858 } 859 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatDiagonalScale_MPIDense" 863 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 864 { 865 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 866 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 867 PetscScalar *l,*r,x,*v; 868 PetscErrorCode ierr; 869 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 870 871 PetscFunctionBegin; 872 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 873 if (ll) { 874 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 875 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 876 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 877 for (i=0; i<m; i++) { 878 x = l[i]; 879 v = mat->v + i; 880 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 881 } 882 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 883 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 884 } 885 if (rr) { 886 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 887 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 888 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 891 for (i=0; i<n; i++) { 892 x = r[i]; 893 v = mat->v + i*m; 894 for (j=0; j<m; j++) { (*v++) *= x;} 895 } 896 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 897 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 898 } 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatNorm_MPIDense" 904 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 905 { 906 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 907 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 908 PetscErrorCode ierr; 909 PetscInt i,j; 910 PetscReal sum = 0.0; 911 PetscScalar *v = mat->v; 912 913 PetscFunctionBegin; 914 if (mdn->size == 1) { 915 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 916 } else { 917 if (type == NORM_FROBENIUS) { 918 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 919 #if defined(PETSC_USE_COMPLEX) 920 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 921 #else 922 sum += (*v)*(*v); v++; 923 #endif 924 } 925 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 926 *nrm = sqrt(*nrm); 927 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 928 } else if (type == NORM_1) { 929 PetscReal *tmp,*tmp2; 930 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 931 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 932 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 933 *nrm = 0.0; 934 v = mat->v; 935 for (j=0; j<mdn->A->cmap->n; j++) { 936 for (i=0; i<mdn->A->rmap->n; i++) { 937 tmp[j] += PetscAbsScalar(*v); v++; 938 } 939 } 940 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 941 for (j=0; j<A->cmap->N; j++) { 942 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 943 } 944 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 945 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 946 } else if (type == NORM_INFINITY) { /* max row norm */ 947 PetscReal ntemp; 948 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 949 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 950 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 951 } 952 PetscFunctionReturn(0); 953 } 954 955 #undef __FUNCT__ 956 #define __FUNCT__ "MatTranspose_MPIDense" 957 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 958 { 959 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 960 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 961 Mat B; 962 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 963 PetscErrorCode ierr; 964 PetscInt j,i; 965 PetscScalar *v; 966 967 PetscFunctionBegin; 968 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 969 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 970 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 971 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 972 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 973 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 974 } else { 975 B = *matout; 976 } 977 978 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 979 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 980 for (i=0; i<m; i++) rwork[i] = rstart + i; 981 for (j=0; j<n; j++) { 982 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 983 v += m; 984 } 985 ierr = PetscFree(rwork);CHKERRQ(ierr); 986 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 987 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 988 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 989 *matout = B; 990 } else { 991 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 992 } 993 PetscFunctionReturn(0); 994 } 995 996 997 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 998 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 999 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1002 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1003 { 1004 PetscErrorCode ierr; 1005 1006 PetscFunctionBegin; 1007 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #if defined(PETSC_HAVE_PLAPACK) 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1015 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1016 { 1017 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1018 PetscErrorCode ierr; 1019 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1020 PetscScalar *array; 1021 PetscReal one = 1.0; 1022 1023 PetscFunctionBegin; 1024 /* Copy A into F->lu->A */ 1025 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1026 ierr = PLA_API_begin();CHKERRQ(ierr); 1027 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1028 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1029 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1030 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1031 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1032 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1033 ierr = PLA_API_end();CHKERRQ(ierr); 1034 lu->rstart = rstart; 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1040 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1041 { 1042 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1043 PetscErrorCode ierr; 1044 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1045 PetscScalar *array; 1046 PetscReal one = 1.0; 1047 1048 PetscFunctionBegin; 1049 /* Copy F into A->lu->A */ 1050 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1051 ierr = PLA_API_begin();CHKERRQ(ierr); 1052 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1053 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1054 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1055 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1056 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1057 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1058 ierr = PLA_API_end();CHKERRQ(ierr); 1059 lu->rstart = rstart; 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1065 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1066 { 1067 PetscErrorCode ierr; 1068 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1069 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1070 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1071 PLA_Obj alpha = NULL,beta = NULL; 1072 1073 PetscFunctionBegin; 1074 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1075 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1076 1077 /* 1078 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1079 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1080 */ 1081 1082 /* do the multiply in PLA */ 1083 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1084 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1085 CHKMEMQ; 1086 1087 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1088 CHKMEMQ; 1089 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1090 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1091 1092 /* 1093 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1094 */ 1095 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1101 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1102 { 1103 PetscErrorCode ierr; 1104 PetscInt m=A->rmap->n,n=B->cmap->n; 1105 Mat Cmat; 1106 1107 PetscFunctionBegin; 1108 if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1109 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1110 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1111 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1112 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1113 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1114 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1115 1116 *C = Cmat; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1122 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1123 { 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 if (scall == MAT_INITIAL_MATRIX){ 1128 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1129 } 1130 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatSolve_MPIDense" 1136 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1137 { 1138 MPI_Comm comm = ((PetscObject)A)->comm; 1139 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1140 PetscErrorCode ierr; 1141 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1142 PetscScalar *array; 1143 PetscReal one = 1.0; 1144 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1145 PLA_Obj v_pla = NULL; 1146 PetscScalar *loc_buf; 1147 Vec loc_x; 1148 1149 PetscFunctionBegin; 1150 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1151 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1152 1153 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1154 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1155 PLA_Obj_set_to_zero(v_pla); 1156 1157 /* Copy b into rhs_pla */ 1158 PLA_API_begin(); 1159 PLA_Obj_API_open(v_pla); 1160 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1161 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1162 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1163 PLA_Obj_API_close(v_pla); 1164 PLA_API_end(); 1165 1166 if (A->factortype == MAT_FACTOR_LU){ 1167 /* Apply the permutations to the right hand sides */ 1168 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1169 1170 /* Solve L y = b, overwriting b with y */ 1171 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1172 1173 /* Solve U x = y (=b), overwriting b with x */ 1174 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1175 } else { /* MAT_FACTOR_CHOLESKY */ 1176 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1177 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1178 PLA_NONUNIT_DIAG,lu->A,v_pla); 1179 } 1180 1181 /* Copy PLAPACK x into Petsc vector x */ 1182 PLA_Obj_local_length(v_pla, &loc_m); 1183 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1184 PLA_Obj_local_stride(v_pla, &loc_stride); 1185 /* 1186 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1187 */ 1188 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1189 if (!lu->pla_solved){ 1190 1191 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1192 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1193 1194 /* Create IS and cts for VecScatterring */ 1195 PLA_Obj_local_length(v_pla, &loc_m); 1196 PLA_Obj_local_stride(v_pla, &loc_stride); 1197 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1198 1199 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1200 for (i=0; i<loc_m; i+=lu->nb){ 1201 j = 0; 1202 while (j < lu->nb && i+j < loc_m){ 1203 idx_petsc[i+j] = rstart + j; j++; 1204 } 1205 rstart += size*lu->nb; 1206 } 1207 1208 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1209 1210 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1211 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1212 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1213 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1214 } 1215 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1216 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1217 1218 /* Free data */ 1219 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1220 PLA_Obj_free(&v_pla); 1221 1222 lu->pla_solved = PETSC_TRUE; 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1228 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1229 { 1230 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1231 PetscErrorCode ierr; 1232 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1233 PetscInt info_pla=0; 1234 PetscScalar *array,one = 1.0; 1235 1236 PetscFunctionBegin; 1237 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1238 PLA_Obj_free(&lu->A); 1239 PLA_Obj_free (&lu->pivots); 1240 } 1241 /* Create PLAPACK matrix object */ 1242 lu->A = NULL; lu->pivots = NULL; 1243 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1244 PLA_Obj_set_to_zero(lu->A); 1245 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1246 1247 /* Copy A into lu->A */ 1248 PLA_API_begin(); 1249 PLA_Obj_API_open(lu->A); 1250 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1251 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1252 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1253 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1254 PLA_Obj_API_close(lu->A); 1255 PLA_API_end(); 1256 1257 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1258 info_pla = PLA_LU(lu->A,lu->pivots); 1259 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1260 1261 lu->rstart = rstart; 1262 lu->mstruct = SAME_NONZERO_PATTERN; 1263 F->ops->solve = MatSolve_MPIDense; 1264 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1270 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1271 { 1272 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1273 PetscErrorCode ierr; 1274 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1275 PetscInt info_pla=0; 1276 PetscScalar *array,one = 1.0; 1277 1278 PetscFunctionBegin; 1279 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1280 PLA_Obj_free(&lu->A); 1281 } 1282 /* Create PLAPACK matrix object */ 1283 lu->A = NULL; 1284 lu->pivots = NULL; 1285 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1286 1287 /* Copy A into lu->A */ 1288 PLA_API_begin(); 1289 PLA_Obj_API_open(lu->A); 1290 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1291 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1292 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1293 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1294 PLA_Obj_API_close(lu->A); 1295 PLA_API_end(); 1296 1297 /* Factor P A -> Chol */ 1298 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1299 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1300 1301 lu->rstart = rstart; 1302 lu->mstruct = SAME_NONZERO_PATTERN; 1303 F->ops->solve = MatSolve_MPIDense; 1304 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1305 PetscFunctionReturn(0); 1306 } 1307 1308 /* Note the Petsc perm permutation is ignored */ 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1311 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1312 { 1313 PetscErrorCode ierr; 1314 PetscTruth issymmetric,set; 1315 1316 PetscFunctionBegin; 1317 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1318 if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1319 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1320 PetscFunctionReturn(0); 1321 } 1322 1323 /* Note the Petsc r and c permutations are ignored */ 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1326 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1327 { 1328 PetscErrorCode ierr; 1329 PetscInt M = A->rmap->N; 1330 Mat_Plapack *lu; 1331 1332 PetscFunctionBegin; 1333 lu = (Mat_Plapack*)F->spptr; 1334 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1335 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1336 PetscFunctionReturn(0); 1337 } 1338 1339 EXTERN_C_BEGIN 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1342 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1343 { 1344 PetscFunctionBegin; 1345 *type = MATSOLVERPLAPACK; 1346 PetscFunctionReturn(0); 1347 } 1348 EXTERN_C_END 1349 1350 EXTERN_C_BEGIN 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1353 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1354 { 1355 PetscErrorCode ierr; 1356 Mat_Plapack *lu; 1357 PetscMPIInt size; 1358 PetscInt M=A->rmap->N; 1359 1360 PetscFunctionBegin; 1361 /* Create the factorization matrix */ 1362 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1363 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1364 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1365 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1366 (*F)->spptr = (void*)lu; 1367 1368 /* Set default Plapack parameters */ 1369 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1370 lu->nb = M/size; 1371 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1372 1373 /* Set runtime options */ 1374 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1375 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1376 PetscOptionsEnd(); 1377 1378 /* Create object distribution template */ 1379 lu->templ = NULL; 1380 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1381 1382 /* Set the datatype */ 1383 #if defined(PETSC_USE_COMPLEX) 1384 lu->datatype = MPI_DOUBLE_COMPLEX; 1385 #else 1386 lu->datatype = MPI_DOUBLE; 1387 #endif 1388 1389 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1390 1391 1392 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1393 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1394 1395 if (ftype == MAT_FACTOR_LU) { 1396 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1397 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1398 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1399 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1400 (*F)->factortype = ftype; 1401 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 EXTERN_C_END 1405 #endif 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1409 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1410 { 1411 #if defined(PETSC_HAVE_PLAPACK) 1412 PetscErrorCode ierr; 1413 #endif 1414 1415 PetscFunctionBegin; 1416 #if defined(PETSC_HAVE_PLAPACK) 1417 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1418 #else 1419 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1420 #endif 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatAXPY_MPIDense" 1426 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1427 { 1428 PetscErrorCode ierr; 1429 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1430 1431 PetscFunctionBegin; 1432 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatConjugate_MPIDense" 1438 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat) 1439 { 1440 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1441 PetscErrorCode ierr; 1442 1443 PetscFunctionBegin; 1444 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 #undef __FUNCT__ 1449 #define __FUNCT__ "MatRealPart_MPIDense" 1450 PetscErrorCode MatRealPart_MPIDense(Mat A) 1451 { 1452 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1453 PetscErrorCode ierr; 1454 1455 PetscFunctionBegin; 1456 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1457 PetscFunctionReturn(0); 1458 } 1459 1460 #undef __FUNCT__ 1461 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1462 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1463 { 1464 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1465 PetscErrorCode ierr; 1466 1467 PetscFunctionBegin; 1468 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } 1471 1472 /* -------------------------------------------------------------------*/ 1473 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1474 MatGetRow_MPIDense, 1475 MatRestoreRow_MPIDense, 1476 MatMult_MPIDense, 1477 /* 4*/ MatMultAdd_MPIDense, 1478 MatMultTranspose_MPIDense, 1479 MatMultTransposeAdd_MPIDense, 1480 0, 1481 0, 1482 0, 1483 /*10*/ 0, 1484 0, 1485 0, 1486 0, 1487 MatTranspose_MPIDense, 1488 /*15*/ MatGetInfo_MPIDense, 1489 MatEqual_MPIDense, 1490 MatGetDiagonal_MPIDense, 1491 MatDiagonalScale_MPIDense, 1492 MatNorm_MPIDense, 1493 /*20*/ MatAssemblyBegin_MPIDense, 1494 MatAssemblyEnd_MPIDense, 1495 MatSetOption_MPIDense, 1496 MatZeroEntries_MPIDense, 1497 /*24*/ MatZeroRows_MPIDense, 1498 0, 1499 0, 1500 0, 1501 0, 1502 /*29*/ MatSetUpPreallocation_MPIDense, 1503 0, 1504 0, 1505 MatGetArray_MPIDense, 1506 MatRestoreArray_MPIDense, 1507 /*34*/ MatDuplicate_MPIDense, 1508 0, 1509 0, 1510 0, 1511 0, 1512 /*39*/ MatAXPY_MPIDense, 1513 MatGetSubMatrices_MPIDense, 1514 0, 1515 MatGetValues_MPIDense, 1516 0, 1517 /*44*/ 0, 1518 MatScale_MPIDense, 1519 0, 1520 0, 1521 0, 1522 /*49*/ 0, 1523 0, 1524 0, 1525 0, 1526 0, 1527 /*54*/ 0, 1528 0, 1529 0, 1530 0, 1531 0, 1532 /*59*/ MatGetSubMatrix_MPIDense, 1533 MatDestroy_MPIDense, 1534 MatView_MPIDense, 1535 0, 1536 0, 1537 /*64*/ 0, 1538 0, 1539 0, 1540 0, 1541 0, 1542 /*69*/ 0, 1543 0, 1544 0, 1545 0, 1546 0, 1547 /*74*/ 0, 1548 0, 1549 0, 1550 0, 1551 0, 1552 /*79*/ 0, 1553 0, 1554 0, 1555 0, 1556 /*83*/ MatLoad_MPIDense, 1557 0, 1558 0, 1559 0, 1560 0, 1561 0, 1562 /*89*/ 1563 #if defined(PETSC_HAVE_PLAPACK) 1564 MatMatMult_MPIDense_MPIDense, 1565 MatMatMultSymbolic_MPIDense_MPIDense, 1566 MatMatMultNumeric_MPIDense_MPIDense, 1567 #else 1568 0, 1569 0, 1570 0, 1571 #endif 1572 0, 1573 0, 1574 /*94*/ 0, 1575 0, 1576 0, 1577 0, 1578 0, 1579 /*99*/ 0, 1580 0, 1581 0, 1582 MatConjugate_MPIDense, 1583 0, 1584 /*104*/0, 1585 MatRealPart_MPIDense, 1586 MatImaginaryPart_MPIDense, 1587 0 1588 }; 1589 1590 EXTERN_C_BEGIN 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1593 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1594 { 1595 Mat_MPIDense *a; 1596 PetscErrorCode ierr; 1597 1598 PetscFunctionBegin; 1599 mat->preallocated = PETSC_TRUE; 1600 /* Note: For now, when data is specified above, this assumes the user correctly 1601 allocates the local dense storage space. We should add error checking. */ 1602 1603 a = (Mat_MPIDense*)mat->data; 1604 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1605 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1606 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1607 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1608 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1609 PetscFunctionReturn(0); 1610 } 1611 EXTERN_C_END 1612 1613 /*MC 1614 MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1615 1616 run ./configure with the option --download-plapack 1617 1618 1619 Options Database Keys: 1620 . -mat_plapack_nprows <n> - number of rows in processor partition 1621 . -mat_plapack_npcols <n> - number of columns in processor partition 1622 . -mat_plapack_nb <n> - block size of template vector 1623 . -mat_plapack_nb_alg <n> - algorithmic block size 1624 - -mat_plapack_ckerror <n> - error checking flag 1625 1626 Level: intermediate 1627 1628 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1629 1630 M*/ 1631 1632 EXTERN_C_BEGIN 1633 #undef __FUNCT__ 1634 #define __FUNCT__ "MatCreate_MPIDense" 1635 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1636 { 1637 Mat_MPIDense *a; 1638 PetscErrorCode ierr; 1639 1640 PetscFunctionBegin; 1641 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1642 mat->data = (void*)a; 1643 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1644 mat->mapping = 0; 1645 1646 mat->insertmode = NOT_SET_VALUES; 1647 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1648 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1649 1650 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1651 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1652 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1653 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1654 a->nvec = mat->cmap->n; 1655 1656 /* build cache for off array entries formed */ 1657 a->donotstash = PETSC_FALSE; 1658 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1659 1660 /* stuff used for matrix vector multiply */ 1661 a->lvec = 0; 1662 a->Mvctx = 0; 1663 a->roworiented = PETSC_TRUE; 1664 1665 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1666 "MatGetDiagonalBlock_MPIDense", 1667 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1668 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1669 "MatMPIDenseSetPreallocation_MPIDense", 1670 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1671 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1672 "MatMatMult_MPIAIJ_MPIDense", 1673 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1674 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1675 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1676 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1677 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1678 "MatMatMultNumeric_MPIAIJ_MPIDense", 1679 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1680 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1681 "MatGetFactor_mpidense_petsc", 1682 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1683 #if defined(PETSC_HAVE_PLAPACK) 1684 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1685 "MatGetFactor_mpidense_plapack", 1686 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1687 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1688 #endif 1689 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1690 1691 PetscFunctionReturn(0); 1692 } 1693 EXTERN_C_END 1694 1695 /*MC 1696 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1697 1698 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1699 and MATMPIDENSE otherwise. 1700 1701 Options Database Keys: 1702 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1703 1704 Level: beginner 1705 1706 1707 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1708 M*/ 1709 1710 EXTERN_C_BEGIN 1711 #undef __FUNCT__ 1712 #define __FUNCT__ "MatCreate_Dense" 1713 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1714 { 1715 PetscErrorCode ierr; 1716 PetscMPIInt size; 1717 1718 PetscFunctionBegin; 1719 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1720 if (size == 1) { 1721 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1722 } else { 1723 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1724 } 1725 PetscFunctionReturn(0); 1726 } 1727 EXTERN_C_END 1728 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1731 /*@C 1732 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1733 1734 Not collective 1735 1736 Input Parameters: 1737 . A - the matrix 1738 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1739 to control all matrix memory allocation. 1740 1741 Notes: 1742 The dense format is fully compatible with standard Fortran 77 1743 storage by columns. 1744 1745 The data input variable is intended primarily for Fortran programmers 1746 who wish to allocate their own matrix memory space. Most users should 1747 set data=PETSC_NULL. 1748 1749 Level: intermediate 1750 1751 .keywords: matrix,dense, parallel 1752 1753 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1754 @*/ 1755 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1756 { 1757 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1758 1759 PetscFunctionBegin; 1760 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1761 if (f) { 1762 ierr = (*f)(mat,data);CHKERRQ(ierr); 1763 } 1764 PetscFunctionReturn(0); 1765 } 1766 1767 #undef __FUNCT__ 1768 #define __FUNCT__ "MatCreateMPIDense" 1769 /*@C 1770 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1771 1772 Collective on MPI_Comm 1773 1774 Input Parameters: 1775 + comm - MPI communicator 1776 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1777 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1778 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1779 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1780 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1781 to control all matrix memory allocation. 1782 1783 Output Parameter: 1784 . A - the matrix 1785 1786 Notes: 1787 The dense format is fully compatible with standard Fortran 77 1788 storage by columns. 1789 1790 The data input variable is intended primarily for Fortran programmers 1791 who wish to allocate their own matrix memory space. Most users should 1792 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1793 1794 The user MUST specify either the local or global matrix dimensions 1795 (possibly both). 1796 1797 Level: intermediate 1798 1799 .keywords: matrix,dense, parallel 1800 1801 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1802 @*/ 1803 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1804 { 1805 PetscErrorCode ierr; 1806 PetscMPIInt size; 1807 1808 PetscFunctionBegin; 1809 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1810 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1811 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1812 if (size > 1) { 1813 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1814 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1815 } else { 1816 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1817 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1818 } 1819 PetscFunctionReturn(0); 1820 } 1821 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "MatDuplicate_MPIDense" 1824 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1825 { 1826 Mat mat; 1827 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1828 PetscErrorCode ierr; 1829 1830 PetscFunctionBegin; 1831 *newmat = 0; 1832 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1833 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1834 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1835 a = (Mat_MPIDense*)mat->data; 1836 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1837 mat->factortype = A->factortype; 1838 mat->assembled = PETSC_TRUE; 1839 mat->preallocated = PETSC_TRUE; 1840 1841 mat->rmap->rstart = A->rmap->rstart; 1842 mat->rmap->rend = A->rmap->rend; 1843 a->size = oldmat->size; 1844 a->rank = oldmat->rank; 1845 mat->insertmode = NOT_SET_VALUES; 1846 a->nvec = oldmat->nvec; 1847 a->donotstash = oldmat->donotstash; 1848 1849 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1850 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1851 1852 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1853 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1854 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1855 1856 *newmat = mat; 1857 PetscFunctionReturn(0); 1858 } 1859 1860 #undef __FUNCT__ 1861 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1862 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat) 1863 { 1864 PetscErrorCode ierr; 1865 PetscMPIInt rank,size; 1866 PetscInt *rowners,i,m,nz,j; 1867 PetscScalar *array,*vals,*vals_ptr; 1868 MPI_Status status; 1869 1870 PetscFunctionBegin; 1871 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1872 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1873 1874 /* determine ownership of all rows */ 1875 m = M/size + ((M % size) > rank); 1876 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1877 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1878 rowners[0] = 0; 1879 for (i=2; i<=size; i++) { 1880 rowners[i] += rowners[i-1]; 1881 } 1882 1883 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1884 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1885 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1886 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1887 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1888 1889 if (!rank) { 1890 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1891 1892 /* read in my part of the matrix numerical values */ 1893 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1894 1895 /* insert into matrix-by row (this is why cannot directly read into array */ 1896 vals_ptr = vals; 1897 for (i=0; i<m; i++) { 1898 for (j=0; j<N; j++) { 1899 array[i + j*m] = *vals_ptr++; 1900 } 1901 } 1902 1903 /* read in other processors and ship out */ 1904 for (i=1; i<size; i++) { 1905 nz = (rowners[i+1] - rowners[i])*N; 1906 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1907 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1908 } 1909 } else { 1910 /* receive numeric values */ 1911 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1912 1913 /* receive message of values*/ 1914 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1915 1916 /* insert into matrix-by row (this is why cannot directly read into array */ 1917 vals_ptr = vals; 1918 for (i=0; i<m; i++) { 1919 for (j=0; j<N; j++) { 1920 array[i + j*m] = *vals_ptr++; 1921 } 1922 } 1923 } 1924 ierr = PetscFree(rowners);CHKERRQ(ierr); 1925 ierr = PetscFree(vals);CHKERRQ(ierr); 1926 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1927 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatLoad_MPIDense" 1933 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1934 { 1935 Mat A; 1936 PetscScalar *vals,*svals; 1937 MPI_Comm comm = ((PetscObject)viewer)->comm; 1938 MPI_Status status; 1939 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1940 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1941 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1942 PetscInt i,nz,j,rstart,rend; 1943 int fd; 1944 PetscErrorCode ierr; 1945 1946 PetscFunctionBegin; 1947 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1948 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1949 if (!rank) { 1950 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1951 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1952 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1953 } 1954 1955 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1956 M = header[1]; N = header[2]; nz = header[3]; 1957 1958 /* 1959 Handle case where matrix is stored on disk as a dense matrix 1960 */ 1961 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1962 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1963 PetscFunctionReturn(0); 1964 } 1965 1966 /* determine ownership of all rows */ 1967 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1968 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1969 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1970 rowners[0] = 0; 1971 for (i=2; i<=size; i++) { 1972 rowners[i] += rowners[i-1]; 1973 } 1974 rstart = rowners[rank]; 1975 rend = rowners[rank+1]; 1976 1977 /* distribute row lengths to all processors */ 1978 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1979 if (!rank) { 1980 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1981 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1982 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1983 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1984 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1985 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1986 } else { 1987 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1988 } 1989 1990 if (!rank) { 1991 /* calculate the number of nonzeros on each processor */ 1992 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1993 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1994 for (i=0; i<size; i++) { 1995 for (j=rowners[i]; j< rowners[i+1]; j++) { 1996 procsnz[i] += rowlengths[j]; 1997 } 1998 } 1999 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2000 2001 /* determine max buffer needed and allocate it */ 2002 maxnz = 0; 2003 for (i=0; i<size; i++) { 2004 maxnz = PetscMax(maxnz,procsnz[i]); 2005 } 2006 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2007 2008 /* read in my part of the matrix column indices */ 2009 nz = procsnz[0]; 2010 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2011 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2012 2013 /* read in every one elses and ship off */ 2014 for (i=1; i<size; i++) { 2015 nz = procsnz[i]; 2016 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2017 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2018 } 2019 ierr = PetscFree(cols);CHKERRQ(ierr); 2020 } else { 2021 /* determine buffer space needed for message */ 2022 nz = 0; 2023 for (i=0; i<m; i++) { 2024 nz += ourlens[i]; 2025 } 2026 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2027 2028 /* receive message of column indices*/ 2029 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2030 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2031 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2032 } 2033 2034 /* loop over local rows, determining number of off diagonal entries */ 2035 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2036 jj = 0; 2037 for (i=0; i<m; i++) { 2038 for (j=0; j<ourlens[i]; j++) { 2039 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2040 jj++; 2041 } 2042 } 2043 2044 /* create our matrix */ 2045 for (i=0; i<m; i++) { 2046 ourlens[i] -= offlens[i]; 2047 } 2048 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 2049 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2050 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 2051 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 2052 A = *newmat; 2053 for (i=0; i<m; i++) { 2054 ourlens[i] += offlens[i]; 2055 } 2056 2057 if (!rank) { 2058 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2059 2060 /* read in my part of the matrix numerical values */ 2061 nz = procsnz[0]; 2062 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2063 2064 /* insert into matrix */ 2065 jj = rstart; 2066 smycols = mycols; 2067 svals = vals; 2068 for (i=0; i<m; i++) { 2069 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2070 smycols += ourlens[i]; 2071 svals += ourlens[i]; 2072 jj++; 2073 } 2074 2075 /* read in other processors and ship out */ 2076 for (i=1; i<size; i++) { 2077 nz = procsnz[i]; 2078 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2079 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2080 } 2081 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2082 } else { 2083 /* receive numeric values */ 2084 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2085 2086 /* receive message of values*/ 2087 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2088 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2089 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2090 2091 /* insert into matrix */ 2092 jj = rstart; 2093 smycols = mycols; 2094 svals = vals; 2095 for (i=0; i<m; i++) { 2096 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2097 smycols += ourlens[i]; 2098 svals += ourlens[i]; 2099 jj++; 2100 } 2101 } 2102 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2103 ierr = PetscFree(vals);CHKERRQ(ierr); 2104 ierr = PetscFree(mycols);CHKERRQ(ierr); 2105 ierr = PetscFree(rowners);CHKERRQ(ierr); 2106 2107 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2108 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2109 PetscFunctionReturn(0); 2110 } 2111 2112 #undef __FUNCT__ 2113 #define __FUNCT__ "MatEqual_MPIDense" 2114 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2115 { 2116 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2117 Mat a,b; 2118 PetscTruth flg; 2119 PetscErrorCode ierr; 2120 2121 PetscFunctionBegin; 2122 a = matA->A; 2123 b = matB->A; 2124 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2125 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2126 PetscFunctionReturn(0); 2127 } 2128 2129 #if defined(PETSC_HAVE_PLAPACK) 2130 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2133 /*@C 2134 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2135 Level: developer 2136 2137 .keywords: Petsc, destroy, package, PLAPACK 2138 .seealso: PetscFinalize() 2139 @*/ 2140 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2141 { 2142 PetscErrorCode ierr; 2143 2144 PetscFunctionBegin; 2145 ierr = PLA_Finalize();CHKERRQ(ierr); 2146 PetscFunctionReturn(0); 2147 } 2148 2149 #undef __FUNCT__ 2150 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2151 /*@C 2152 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2153 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2154 2155 Input Parameter: 2156 . comm - the communicator the matrix lives on 2157 2158 Level: developer 2159 2160 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2161 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2162 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2163 cannot overlap. 2164 2165 .keywords: Petsc, initialize, package, PLAPACK 2166 .seealso: PetscSysInitializePackage(), PetscInitialize() 2167 @*/ 2168 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2169 { 2170 PetscMPIInt size; 2171 PetscErrorCode ierr; 2172 2173 PetscFunctionBegin; 2174 if (!PLA_Initialized(PETSC_NULL)) { 2175 2176 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2177 Plapack_nprows = 1; 2178 Plapack_npcols = size; 2179 2180 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2181 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2182 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2183 #if defined(PETSC_USE_DEBUG) 2184 Plapack_ierror = 3; 2185 #else 2186 Plapack_ierror = 0; 2187 #endif 2188 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2189 if (Plapack_ierror){ 2190 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2191 } else { 2192 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2193 } 2194 2195 Plapack_nb_alg = 0; 2196 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2197 if (Plapack_nb_alg) { 2198 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2199 } 2200 PetscOptionsEnd(); 2201 2202 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2203 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2204 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2205 } 2206 PetscFunctionReturn(0); 2207 } 2208 2209 #endif 2210