1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 EXTERN_C_BEGIN 13 #include "PLA.h" 14 EXTERN_C_END 15 16 typedef struct { 17 PLA_Obj A,pivots; 18 PLA_Template templ; 19 MPI_Datatype datatype; 20 PetscInt nb,rstart; 21 VecScatter ctx; 22 IS is_pla,is_petsc; 23 PetscTruth pla_solved; 24 MatStructure mstruct; 25 } Mat_Plapack; 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatDenseGetLocalMatrix" 30 /*@ 31 32 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 33 matrix that represents the operator. For sequential matrices it returns itself. 34 35 Input Parameter: 36 . A - the Seq or MPI dense matrix 37 38 Output Parameter: 39 . B - the inner matrix 40 41 Level: intermediate 42 43 @*/ 44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 45 { 46 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 47 PetscErrorCode ierr; 48 PetscTruth flg; 49 50 PetscFunctionBegin; 51 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 52 if (flg) { 53 *B = mat->A; 54 } else { 55 *B = A; 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatGetRow_MPIDense" 62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 63 { 64 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 65 PetscErrorCode ierr; 66 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 67 68 PetscFunctionBegin; 69 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 70 lrow = row - rstart; 71 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatRestoreRow_MPIDense" 77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 83 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 84 PetscFunctionReturn(0); 85 } 86 87 EXTERN_C_BEGIN 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 91 { 92 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 93 PetscErrorCode ierr; 94 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 95 PetscScalar *array; 96 MPI_Comm comm; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 100 101 /* The reuse aspect is not implemented efficiently */ 102 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 103 104 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 105 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 106 ierr = MatCreate(comm,B);CHKERRQ(ierr); 107 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 108 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 109 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 110 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 111 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 113 114 *iscopy = PETSC_TRUE; 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatSetValues_MPIDense" 121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 122 { 123 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 124 PetscErrorCode ierr; 125 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 126 PetscTruth roworiented = A->roworiented; 127 128 PetscFunctionBegin; 129 if (v) PetscValidScalarPointer(v,6); 130 for (i=0; i<m; i++) { 131 if (idxm[i] < 0) continue; 132 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 133 if (idxm[i] >= rstart && idxm[i] < rend) { 134 row = idxm[i] - rstart; 135 if (roworiented) { 136 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 137 } else { 138 for (j=0; j<n; j++) { 139 if (idxn[j] < 0) continue; 140 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 141 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 142 } 143 } 144 } else { 145 if (!A->donotstash) { 146 if (roworiented) { 147 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 148 } else { 149 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 150 } 151 } 152 } 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetValues_MPIDense" 159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 160 { 161 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 164 165 PetscFunctionBegin; 166 for (i=0; i<m; i++) { 167 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 168 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 169 if (idxm[i] >= rstart && idxm[i] < rend) { 170 row = idxm[i] - rstart; 171 for (j=0; j<n; j++) { 172 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 173 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 174 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 175 } 176 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 177 } 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatGetArray_MPIDense" 183 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 184 { 185 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 186 PetscErrorCode ierr; 187 188 PetscFunctionBegin; 189 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 195 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 196 { 197 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 198 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 199 PetscErrorCode ierr; 200 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 201 const PetscInt *irow,*icol; 202 PetscScalar *av,*bv,*v = lmat->v; 203 Mat newmat; 204 IS iscol_local; 205 206 PetscFunctionBegin; 207 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 208 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 209 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 210 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 211 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 212 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 213 214 /* No parallel redistribution currently supported! Should really check each index set 215 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 216 original matrix! */ 217 218 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 219 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 220 221 /* Check submatrix call */ 222 if (scall == MAT_REUSE_MATRIX) { 223 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 224 /* Really need to test rows and column sizes! */ 225 newmat = *B; 226 } else { 227 /* Create and fill new matrix */ 228 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 229 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 230 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 231 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 232 } 233 234 /* Now extract the data pointers and do the copy, column at a time */ 235 newmatd = (Mat_MPIDense*)newmat->data; 236 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 237 238 for (i=0; i<Ncols; i++) { 239 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 240 for (j=0; j<nrows; j++) { 241 *bv++ = av[irow[j] - rstart]; 242 } 243 } 244 245 /* Assemble the matrices so that the correct flags are set */ 246 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 248 249 /* Free work space */ 250 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 251 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 252 *B = newmat; 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatRestoreArray_MPIDense" 258 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 259 { 260 PetscFunctionBegin; 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 266 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 267 { 268 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 269 MPI_Comm comm = ((PetscObject)mat)->comm; 270 PetscErrorCode ierr; 271 PetscInt nstash,reallocs; 272 InsertMode addv; 273 274 PetscFunctionBegin; 275 /* make sure all processors are either in INSERTMODE or ADDMODE */ 276 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 277 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 278 mat->insertmode = addv; /* in case this processor had no cache */ 279 280 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 281 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 282 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 288 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 289 { 290 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 291 PetscErrorCode ierr; 292 PetscInt i,*row,*col,flg,j,rstart,ncols; 293 PetscMPIInt n; 294 PetscScalar *val; 295 InsertMode addv=mat->insertmode; 296 297 PetscFunctionBegin; 298 /* wait on receives */ 299 while (1) { 300 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 301 if (!flg) break; 302 303 for (i=0; i<n;) { 304 /* Now identify the consecutive vals belonging to the same row */ 305 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 306 if (j < n) ncols = j-i; 307 else ncols = n-i; 308 /* Now assemble all these values with a single function call */ 309 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 310 i = j; 311 } 312 } 313 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 314 315 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 316 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 317 318 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 319 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatZeroEntries_MPIDense" 326 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 327 { 328 PetscErrorCode ierr; 329 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 330 331 PetscFunctionBegin; 332 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 333 PetscFunctionReturn(0); 334 } 335 336 /* the code does not do the diagonal entries correctly unless the 337 matrix is square and the column and row owerships are identical. 338 This is a BUG. The only way to fix it seems to be to access 339 mdn->A and mdn->B directly and not through the MatZeroRows() 340 routine. 341 */ 342 #undef __FUNCT__ 343 #define __FUNCT__ "MatZeroRows_MPIDense" 344 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 345 { 346 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 347 PetscErrorCode ierr; 348 PetscInt i,*owners = A->rmap->range; 349 PetscInt *nprocs,j,idx,nsends; 350 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 351 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 352 PetscInt *lens,*lrows,*values; 353 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 354 MPI_Comm comm = ((PetscObject)A)->comm; 355 MPI_Request *send_waits,*recv_waits; 356 MPI_Status recv_status,*send_status; 357 PetscTruth found; 358 359 PetscFunctionBegin; 360 /* first count number of contributors to each processor */ 361 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 362 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 363 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 364 for (i=0; i<N; i++) { 365 idx = rows[i]; 366 found = PETSC_FALSE; 367 for (j=0; j<size; j++) { 368 if (idx >= owners[j] && idx < owners[j+1]) { 369 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 370 } 371 } 372 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 373 } 374 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 375 376 /* inform other processors of number of messages and max length*/ 377 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 378 379 /* post receives: */ 380 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 381 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 382 for (i=0; i<nrecvs; i++) { 383 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 384 } 385 386 /* do sends: 387 1) starts[i] gives the starting index in svalues for stuff going to 388 the ith processor 389 */ 390 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 391 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 392 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 393 starts[0] = 0; 394 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 395 for (i=0; i<N; i++) { 396 svalues[starts[owner[i]]++] = rows[i]; 397 } 398 399 starts[0] = 0; 400 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 401 count = 0; 402 for (i=0; i<size; i++) { 403 if (nprocs[2*i+1]) { 404 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 405 } 406 } 407 ierr = PetscFree(starts);CHKERRQ(ierr); 408 409 base = owners[rank]; 410 411 /* wait on receives */ 412 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 413 count = nrecvs; 414 slen = 0; 415 while (count) { 416 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 417 /* unpack receives into our local space */ 418 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 419 source[imdex] = recv_status.MPI_SOURCE; 420 lens[imdex] = n; 421 slen += n; 422 count--; 423 } 424 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 425 426 /* move the data into the send scatter */ 427 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 428 count = 0; 429 for (i=0; i<nrecvs; i++) { 430 values = rvalues + i*nmax; 431 for (j=0; j<lens[i]; j++) { 432 lrows[count++] = values[j] - base; 433 } 434 } 435 ierr = PetscFree(rvalues);CHKERRQ(ierr); 436 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 437 ierr = PetscFree(owner);CHKERRQ(ierr); 438 ierr = PetscFree(nprocs);CHKERRQ(ierr); 439 440 /* actually zap the local rows */ 441 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 442 ierr = PetscFree(lrows);CHKERRQ(ierr); 443 444 /* wait on sends */ 445 if (nsends) { 446 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 447 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 448 ierr = PetscFree(send_status);CHKERRQ(ierr); 449 } 450 ierr = PetscFree(send_waits);CHKERRQ(ierr); 451 ierr = PetscFree(svalues);CHKERRQ(ierr); 452 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatMult_MPIDense" 458 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 459 { 460 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 465 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 467 PetscFunctionReturn(0); 468 } 469 470 #undef __FUNCT__ 471 #define __FUNCT__ "MatMultAdd_MPIDense" 472 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 477 PetscFunctionBegin; 478 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 480 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatMultTranspose_MPIDense" 486 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 487 { 488 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 489 PetscErrorCode ierr; 490 PetscScalar zero = 0.0; 491 492 PetscFunctionBegin; 493 ierr = VecSet(yy,zero);CHKERRQ(ierr); 494 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 495 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 496 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 502 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 503 { 504 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 509 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 510 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 511 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 512 PetscFunctionReturn(0); 513 } 514 515 #undef __FUNCT__ 516 #define __FUNCT__ "MatGetDiagonal_MPIDense" 517 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 518 { 519 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 520 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 521 PetscErrorCode ierr; 522 PetscInt len,i,n,m = A->rmap->n,radd; 523 PetscScalar *x,zero = 0.0; 524 525 PetscFunctionBegin; 526 ierr = VecSet(v,zero);CHKERRQ(ierr); 527 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 528 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 529 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 530 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 531 radd = A->rmap->rstart*m; 532 for (i=0; i<len; i++) { 533 x[i] = aloc->v[radd + i*m + i]; 534 } 535 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatDestroy_MPIDense" 541 PetscErrorCode MatDestroy_MPIDense(Mat mat) 542 { 543 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 544 PetscErrorCode ierr; 545 #if defined(PETSC_HAVE_PLAPACK) 546 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 547 #endif 548 549 PetscFunctionBegin; 550 551 #if defined(PETSC_USE_LOG) 552 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 553 #endif 554 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 555 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 556 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 557 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 558 #if defined(PETSC_HAVE_PLAPACK) 559 if (lu) { 560 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 561 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 562 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 563 564 if (lu->is_pla) { 565 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 566 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 567 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 568 } 569 } 570 #endif 571 572 ierr = PetscFree(mdn);CHKERRQ(ierr); 573 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatView_MPIDense_Binary" 584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 585 { 586 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 587 PetscErrorCode ierr; 588 PetscViewerFormat format; 589 int fd; 590 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 591 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 592 PetscScalar *work,*v,*vv; 593 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 594 MPI_Status status; 595 596 PetscFunctionBegin; 597 if (mdn->size == 1) { 598 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 599 } else { 600 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 601 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 602 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 603 604 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 605 if (format == PETSC_VIEWER_NATIVE) { 606 607 if (!rank) { 608 /* store the matrix as a dense matrix */ 609 header[0] = MAT_FILE_CLASSID; 610 header[1] = mat->rmap->N; 611 header[2] = N; 612 header[3] = MATRIX_BINARY_FORMAT_DENSE; 613 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 614 615 /* get largest work array needed for transposing array */ 616 mmax = mat->rmap->n; 617 for (i=1; i<size; i++) { 618 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 619 } 620 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 621 622 /* write out local array, by rows */ 623 m = mat->rmap->n; 624 v = a->v; 625 for (j=0; j<N; j++) { 626 for (i=0; i<m; i++) { 627 work[j + i*N] = *v++; 628 } 629 } 630 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 631 /* get largest work array to receive messages from other processes, excludes process zero */ 632 mmax = 0; 633 for (i=1; i<size; i++) { 634 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 635 } 636 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 637 for(k = 1; k < size; k++) { 638 v = vv; 639 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 640 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 641 642 for(j = 0; j < N; j++) { 643 for(i = 0; i < m; i++) { 644 work[j + i*N] = *v++; 645 } 646 } 647 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 648 } 649 ierr = PetscFree(work);CHKERRQ(ierr); 650 ierr = PetscFree(vv);CHKERRQ(ierr); 651 } else { 652 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 653 } 654 } else { 655 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE"); 656 } 657 } 658 PetscFunctionReturn(0); 659 } 660 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 663 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 664 { 665 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 666 PetscErrorCode ierr; 667 PetscMPIInt size = mdn->size,rank = mdn->rank; 668 const PetscViewerType vtype; 669 PetscTruth iascii,isdraw; 670 PetscViewer sviewer; 671 PetscViewerFormat format; 672 #if defined(PETSC_HAVE_PLAPACK) 673 Mat_Plapack *lu; 674 #endif 675 676 PetscFunctionBegin; 677 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 678 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 679 if (iascii) { 680 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 681 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 682 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 683 MatInfo info; 684 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 685 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 686 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 687 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 688 #if defined(PETSC_HAVE_PLAPACK) 689 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 690 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 691 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 692 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 693 if (mat->factortype){ 694 lu=(Mat_Plapack*)(mat->spptr); 695 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 696 } 697 #else 698 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 699 #endif 700 PetscFunctionReturn(0); 701 } else if (format == PETSC_VIEWER_ASCII_INFO) { 702 PetscFunctionReturn(0); 703 } 704 } else if (isdraw) { 705 PetscDraw draw; 706 PetscTruth isnull; 707 708 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 709 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 710 if (isnull) PetscFunctionReturn(0); 711 } 712 713 if (size == 1) { 714 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 715 } else { 716 /* assemble the entire matrix onto first processor. */ 717 Mat A; 718 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 719 PetscInt *cols; 720 PetscScalar *vals; 721 722 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 723 if (!rank) { 724 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 725 } else { 726 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 727 } 728 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 729 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 730 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 731 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 732 733 /* Copy the matrix ... This isn't the most efficient means, 734 but it's quick for now */ 735 A->insertmode = INSERT_VALUES; 736 row = mat->rmap->rstart; m = mdn->A->rmap->n; 737 for (i=0; i<m; i++) { 738 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 739 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 740 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 741 row++; 742 } 743 744 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 745 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 746 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 747 if (!rank) { 748 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 749 } 750 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 751 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 752 ierr = MatDestroy(A);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatView_MPIDense" 759 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 760 { 761 PetscErrorCode ierr; 762 PetscTruth iascii,isbinary,isdraw,issocket; 763 764 PetscFunctionBegin; 765 766 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 767 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 768 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 769 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 770 771 if (iascii || issocket || isdraw) { 772 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 773 } else if (isbinary) { 774 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 775 } else { 776 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 777 } 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatGetInfo_MPIDense" 783 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 784 { 785 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 786 Mat mdn = mat->A; 787 PetscErrorCode ierr; 788 PetscReal isend[5],irecv[5]; 789 790 PetscFunctionBegin; 791 info->block_size = 1.0; 792 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 793 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 794 isend[3] = info->memory; isend[4] = info->mallocs; 795 if (flag == MAT_LOCAL) { 796 info->nz_used = isend[0]; 797 info->nz_allocated = isend[1]; 798 info->nz_unneeded = isend[2]; 799 info->memory = isend[3]; 800 info->mallocs = isend[4]; 801 } else if (flag == MAT_GLOBAL_MAX) { 802 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 803 info->nz_used = irecv[0]; 804 info->nz_allocated = irecv[1]; 805 info->nz_unneeded = irecv[2]; 806 info->memory = irecv[3]; 807 info->mallocs = irecv[4]; 808 } else if (flag == MAT_GLOBAL_SUM) { 809 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 810 info->nz_used = irecv[0]; 811 info->nz_allocated = irecv[1]; 812 info->nz_unneeded = irecv[2]; 813 info->memory = irecv[3]; 814 info->mallocs = irecv[4]; 815 } 816 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 817 info->fill_ratio_needed = 0; 818 info->factor_mallocs = 0; 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatSetOption_MPIDense" 824 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 825 { 826 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 switch (op) { 831 case MAT_NEW_NONZERO_LOCATIONS: 832 case MAT_NEW_NONZERO_LOCATION_ERR: 833 case MAT_NEW_NONZERO_ALLOCATION_ERR: 834 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 835 break; 836 case MAT_ROW_ORIENTED: 837 a->roworiented = flg; 838 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 839 break; 840 case MAT_NEW_DIAGONALS: 841 case MAT_USE_HASH_TABLE: 842 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 843 break; 844 case MAT_IGNORE_OFF_PROC_ENTRIES: 845 a->donotstash = flg; 846 break; 847 case MAT_SYMMETRIC: 848 case MAT_STRUCTURALLY_SYMMETRIC: 849 case MAT_HERMITIAN: 850 case MAT_SYMMETRY_ETERNAL: 851 case MAT_IGNORE_LOWER_TRIANGULAR: 852 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 853 break; 854 default: 855 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 856 } 857 PetscFunctionReturn(0); 858 } 859 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatDiagonalScale_MPIDense" 863 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 864 { 865 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 866 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 867 PetscScalar *l,*r,x,*v; 868 PetscErrorCode ierr; 869 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 870 871 PetscFunctionBegin; 872 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 873 if (ll) { 874 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 875 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 876 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 877 for (i=0; i<m; i++) { 878 x = l[i]; 879 v = mat->v + i; 880 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 881 } 882 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 883 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 884 } 885 if (rr) { 886 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 887 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 888 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 891 for (i=0; i<n; i++) { 892 x = r[i]; 893 v = mat->v + i*m; 894 for (j=0; j<m; j++) { (*v++) *= x;} 895 } 896 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 897 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 898 } 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatNorm_MPIDense" 904 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 905 { 906 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 907 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 908 PetscErrorCode ierr; 909 PetscInt i,j; 910 PetscReal sum = 0.0; 911 PetscScalar *v = mat->v; 912 913 PetscFunctionBegin; 914 if (mdn->size == 1) { 915 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 916 } else { 917 if (type == NORM_FROBENIUS) { 918 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 919 #if defined(PETSC_USE_COMPLEX) 920 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 921 #else 922 sum += (*v)*(*v); v++; 923 #endif 924 } 925 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 926 *nrm = sqrt(*nrm); 927 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 928 } else if (type == NORM_1) { 929 PetscReal *tmp,*tmp2; 930 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 931 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 932 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 933 *nrm = 0.0; 934 v = mat->v; 935 for (j=0; j<mdn->A->cmap->n; j++) { 936 for (i=0; i<mdn->A->rmap->n; i++) { 937 tmp[j] += PetscAbsScalar(*v); v++; 938 } 939 } 940 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 941 for (j=0; j<A->cmap->N; j++) { 942 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 943 } 944 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 945 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 946 } else if (type == NORM_INFINITY) { /* max row norm */ 947 PetscReal ntemp; 948 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 949 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 950 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 951 } 952 PetscFunctionReturn(0); 953 } 954 955 #undef __FUNCT__ 956 #define __FUNCT__ "MatTranspose_MPIDense" 957 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 958 { 959 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 960 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 961 Mat B; 962 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 963 PetscErrorCode ierr; 964 PetscInt j,i; 965 PetscScalar *v; 966 967 PetscFunctionBegin; 968 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 969 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 970 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 971 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 972 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 973 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 974 } else { 975 B = *matout; 976 } 977 978 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 979 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 980 for (i=0; i<m; i++) rwork[i] = rstart + i; 981 for (j=0; j<n; j++) { 982 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 983 v += m; 984 } 985 ierr = PetscFree(rwork);CHKERRQ(ierr); 986 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 987 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 988 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 989 *matout = B; 990 } else { 991 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 992 } 993 PetscFunctionReturn(0); 994 } 995 996 997 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 998 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 999 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1002 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1003 { 1004 PetscErrorCode ierr; 1005 1006 PetscFunctionBegin; 1007 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #if defined(PETSC_HAVE_PLAPACK) 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1015 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1016 { 1017 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1018 PetscErrorCode ierr; 1019 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1020 PetscScalar *array; 1021 PetscReal one = 1.0; 1022 1023 PetscFunctionBegin; 1024 /* Copy A into F->lu->A */ 1025 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1026 ierr = PLA_API_begin();CHKERRQ(ierr); 1027 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1028 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1029 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1030 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1031 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1032 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1033 ierr = PLA_API_end();CHKERRQ(ierr); 1034 lu->rstart = rstart; 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1040 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1041 { 1042 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1043 PetscErrorCode ierr; 1044 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1045 PetscScalar *array; 1046 PetscReal one = 1.0; 1047 1048 PetscFunctionBegin; 1049 /* Copy F into A->lu->A */ 1050 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1051 ierr = PLA_API_begin();CHKERRQ(ierr); 1052 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1053 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1054 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1055 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1056 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1057 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1058 ierr = PLA_API_end();CHKERRQ(ierr); 1059 lu->rstart = rstart; 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1065 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1066 { 1067 PetscErrorCode ierr; 1068 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1069 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1070 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1071 PLA_Obj alpha = NULL,beta = NULL; 1072 1073 PetscFunctionBegin; 1074 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1075 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1076 1077 /* 1078 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1079 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1080 */ 1081 1082 /* do the multiply in PLA */ 1083 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1084 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1085 CHKMEMQ; 1086 1087 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1088 CHKMEMQ; 1089 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1090 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1091 1092 /* 1093 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1094 */ 1095 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1101 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1102 { 1103 PetscErrorCode ierr; 1104 PetscInt m=A->rmap->n,n=B->cmap->n; 1105 Mat Cmat; 1106 1107 PetscFunctionBegin; 1108 if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1109 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1110 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1111 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1112 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1113 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1114 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1115 1116 *C = Cmat; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1122 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1123 { 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 if (scall == MAT_INITIAL_MATRIX){ 1128 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1129 } 1130 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatSolve_MPIDense" 1136 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1137 { 1138 MPI_Comm comm = ((PetscObject)A)->comm; 1139 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1140 PetscErrorCode ierr; 1141 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1142 PetscScalar *array; 1143 PetscReal one = 1.0; 1144 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1145 PLA_Obj v_pla = NULL; 1146 PetscScalar *loc_buf; 1147 Vec loc_x; 1148 1149 PetscFunctionBegin; 1150 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1151 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1152 1153 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1154 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1155 PLA_Obj_set_to_zero(v_pla); 1156 1157 /* Copy b into rhs_pla */ 1158 PLA_API_begin(); 1159 PLA_Obj_API_open(v_pla); 1160 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1161 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1162 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1163 PLA_Obj_API_close(v_pla); 1164 PLA_API_end(); 1165 1166 if (A->factortype == MAT_FACTOR_LU){ 1167 /* Apply the permutations to the right hand sides */ 1168 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1169 1170 /* Solve L y = b, overwriting b with y */ 1171 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1172 1173 /* Solve U x = y (=b), overwriting b with x */ 1174 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1175 } else { /* MAT_FACTOR_CHOLESKY */ 1176 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1177 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1178 PLA_NONUNIT_DIAG,lu->A,v_pla); 1179 } 1180 1181 /* Copy PLAPACK x into Petsc vector x */ 1182 PLA_Obj_local_length(v_pla, &loc_m); 1183 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1184 PLA_Obj_local_stride(v_pla, &loc_stride); 1185 /* 1186 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1187 */ 1188 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1189 if (!lu->pla_solved){ 1190 1191 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1192 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1193 1194 /* Create IS and cts for VecScatterring */ 1195 PLA_Obj_local_length(v_pla, &loc_m); 1196 PLA_Obj_local_stride(v_pla, &loc_stride); 1197 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1198 1199 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1200 for (i=0; i<loc_m; i+=lu->nb){ 1201 j = 0; 1202 while (j < lu->nb && i+j < loc_m){ 1203 idx_petsc[i+j] = rstart + j; j++; 1204 } 1205 rstart += size*lu->nb; 1206 } 1207 1208 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1209 1210 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1211 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1212 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1213 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1214 } 1215 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1216 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1217 1218 /* Free data */ 1219 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1220 PLA_Obj_free(&v_pla); 1221 1222 lu->pla_solved = PETSC_TRUE; 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1228 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1229 { 1230 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1231 PetscErrorCode ierr; 1232 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1233 PetscInt info_pla=0; 1234 PetscScalar *array,one = 1.0; 1235 1236 PetscFunctionBegin; 1237 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1238 PLA_Obj_free(&lu->A); 1239 PLA_Obj_free (&lu->pivots); 1240 } 1241 /* Create PLAPACK matrix object */ 1242 lu->A = NULL; lu->pivots = NULL; 1243 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1244 PLA_Obj_set_to_zero(lu->A); 1245 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1246 1247 /* Copy A into lu->A */ 1248 PLA_API_begin(); 1249 PLA_Obj_API_open(lu->A); 1250 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1251 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1252 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1253 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1254 PLA_Obj_API_close(lu->A); 1255 PLA_API_end(); 1256 1257 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1258 info_pla = PLA_LU(lu->A,lu->pivots); 1259 if (info_pla != 0) 1260 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1261 1262 lu->rstart = rstart; 1263 lu->mstruct = SAME_NONZERO_PATTERN; 1264 F->ops->solve = MatSolve_MPIDense; 1265 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1271 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1272 { 1273 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1274 PetscErrorCode ierr; 1275 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1276 PetscInt info_pla=0; 1277 PetscScalar *array,one = 1.0; 1278 1279 PetscFunctionBegin; 1280 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1281 PLA_Obj_free(&lu->A); 1282 } 1283 /* Create PLAPACK matrix object */ 1284 lu->A = NULL; 1285 lu->pivots = NULL; 1286 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1287 1288 /* Copy A into lu->A */ 1289 PLA_API_begin(); 1290 PLA_Obj_API_open(lu->A); 1291 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1292 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1293 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1294 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1295 PLA_Obj_API_close(lu->A); 1296 PLA_API_end(); 1297 1298 /* Factor P A -> Chol */ 1299 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1300 if (info_pla != 0) 1301 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1302 1303 lu->rstart = rstart; 1304 lu->mstruct = SAME_NONZERO_PATTERN; 1305 F->ops->solve = MatSolve_MPIDense; 1306 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1307 PetscFunctionReturn(0); 1308 } 1309 1310 /* Note the Petsc perm permutation is ignored */ 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1313 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1314 { 1315 PetscErrorCode ierr; 1316 PetscTruth issymmetric,set; 1317 1318 PetscFunctionBegin; 1319 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1320 if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1321 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1322 PetscFunctionReturn(0); 1323 } 1324 1325 /* Note the Petsc r and c permutations are ignored */ 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1328 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1329 { 1330 PetscErrorCode ierr; 1331 PetscInt M = A->rmap->N; 1332 Mat_Plapack *lu; 1333 1334 PetscFunctionBegin; 1335 lu = (Mat_Plapack*)F->spptr; 1336 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1337 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1338 PetscFunctionReturn(0); 1339 } 1340 1341 EXTERN_C_BEGIN 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1344 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1345 { 1346 PetscFunctionBegin; 1347 *type = MAT_SOLVER_PLAPACK; 1348 PetscFunctionReturn(0); 1349 } 1350 EXTERN_C_END 1351 1352 EXTERN_C_BEGIN 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1355 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1356 { 1357 PetscErrorCode ierr; 1358 Mat_Plapack *lu; 1359 PetscMPIInt size; 1360 PetscInt M=A->rmap->N; 1361 1362 PetscFunctionBegin; 1363 /* Create the factorization matrix */ 1364 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1365 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1366 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1367 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1368 (*F)->spptr = (void*)lu; 1369 1370 /* Set default Plapack parameters */ 1371 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1372 lu->nb = M/size; 1373 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1374 1375 /* Set runtime options */ 1376 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1377 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1378 PetscOptionsEnd(); 1379 1380 /* Create object distribution template */ 1381 lu->templ = NULL; 1382 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1383 1384 /* Set the datatype */ 1385 #if defined(PETSC_USE_COMPLEX) 1386 lu->datatype = MPI_DOUBLE_COMPLEX; 1387 #else 1388 lu->datatype = MPI_DOUBLE; 1389 #endif 1390 1391 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1392 1393 1394 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1395 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1396 1397 if (ftype == MAT_FACTOR_LU) { 1398 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1399 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1400 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1401 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1402 (*F)->factortype = ftype; 1403 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 EXTERN_C_END 1407 #endif 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1411 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1412 { 1413 #if defined(PETSC_HAVE_PLAPACK) 1414 PetscErrorCode ierr; 1415 #endif 1416 1417 PetscFunctionBegin; 1418 #if defined(PETSC_HAVE_PLAPACK) 1419 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1420 #else 1421 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1422 #endif 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatAXPY_MPIDense" 1428 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1429 { 1430 PetscErrorCode ierr; 1431 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1432 1433 PetscFunctionBegin; 1434 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatConjugate_MPIDense" 1440 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat) 1441 { 1442 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1443 PetscErrorCode ierr; 1444 1445 PetscFunctionBegin; 1446 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1447 PetscFunctionReturn(0); 1448 } 1449 1450 #undef __FUNCT__ 1451 #define __FUNCT__ "MatRealPart_MPIDense" 1452 PetscErrorCode MatRealPart_MPIDense(Mat A) 1453 { 1454 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1455 PetscErrorCode ierr; 1456 1457 PetscFunctionBegin; 1458 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } 1461 1462 #undef __FUNCT__ 1463 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1464 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1465 { 1466 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1467 PetscErrorCode ierr; 1468 1469 PetscFunctionBegin; 1470 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 /* -------------------------------------------------------------------*/ 1475 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1476 MatGetRow_MPIDense, 1477 MatRestoreRow_MPIDense, 1478 MatMult_MPIDense, 1479 /* 4*/ MatMultAdd_MPIDense, 1480 MatMultTranspose_MPIDense, 1481 MatMultTransposeAdd_MPIDense, 1482 0, 1483 0, 1484 0, 1485 /*10*/ 0, 1486 0, 1487 0, 1488 0, 1489 MatTranspose_MPIDense, 1490 /*15*/ MatGetInfo_MPIDense, 1491 MatEqual_MPIDense, 1492 MatGetDiagonal_MPIDense, 1493 MatDiagonalScale_MPIDense, 1494 MatNorm_MPIDense, 1495 /*20*/ MatAssemblyBegin_MPIDense, 1496 MatAssemblyEnd_MPIDense, 1497 MatSetOption_MPIDense, 1498 MatZeroEntries_MPIDense, 1499 /*24*/ MatZeroRows_MPIDense, 1500 0, 1501 0, 1502 0, 1503 0, 1504 /*29*/ MatSetUpPreallocation_MPIDense, 1505 0, 1506 0, 1507 MatGetArray_MPIDense, 1508 MatRestoreArray_MPIDense, 1509 /*34*/ MatDuplicate_MPIDense, 1510 0, 1511 0, 1512 0, 1513 0, 1514 /*39*/ MatAXPY_MPIDense, 1515 MatGetSubMatrices_MPIDense, 1516 0, 1517 MatGetValues_MPIDense, 1518 0, 1519 /*44*/ 0, 1520 MatScale_MPIDense, 1521 0, 1522 0, 1523 0, 1524 /*49*/ 0, 1525 0, 1526 0, 1527 0, 1528 0, 1529 /*54*/ 0, 1530 0, 1531 0, 1532 0, 1533 0, 1534 /*59*/ MatGetSubMatrix_MPIDense, 1535 MatDestroy_MPIDense, 1536 MatView_MPIDense, 1537 0, 1538 0, 1539 /*64*/ 0, 1540 0, 1541 0, 1542 0, 1543 0, 1544 /*69*/ 0, 1545 0, 1546 0, 1547 0, 1548 0, 1549 /*74*/ 0, 1550 0, 1551 0, 1552 0, 1553 0, 1554 /*79*/ 0, 1555 0, 1556 0, 1557 0, 1558 /*83*/ MatLoad_MPIDense, 1559 0, 1560 0, 1561 0, 1562 0, 1563 0, 1564 /*89*/ 1565 #if defined(PETSC_HAVE_PLAPACK) 1566 MatMatMult_MPIDense_MPIDense, 1567 MatMatMultSymbolic_MPIDense_MPIDense, 1568 MatMatMultNumeric_MPIDense_MPIDense, 1569 #else 1570 0, 1571 0, 1572 0, 1573 #endif 1574 0, 1575 0, 1576 /*94*/ 0, 1577 0, 1578 0, 1579 0, 1580 0, 1581 /*99*/ 0, 1582 0, 1583 0, 1584 MatConjugate_MPIDense, 1585 0, 1586 /*104*/0, 1587 MatRealPart_MPIDense, 1588 MatImaginaryPart_MPIDense, 1589 0 1590 }; 1591 1592 EXTERN_C_BEGIN 1593 #undef __FUNCT__ 1594 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1595 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1596 { 1597 Mat_MPIDense *a; 1598 PetscErrorCode ierr; 1599 1600 PetscFunctionBegin; 1601 mat->preallocated = PETSC_TRUE; 1602 /* Note: For now, when data is specified above, this assumes the user correctly 1603 allocates the local dense storage space. We should add error checking. */ 1604 1605 a = (Mat_MPIDense*)mat->data; 1606 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1607 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1608 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1609 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1610 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1611 PetscFunctionReturn(0); 1612 } 1613 EXTERN_C_END 1614 1615 /*MC 1616 MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1617 1618 run ./configure with the option --download-plapack 1619 1620 1621 Options Database Keys: 1622 . -mat_plapack_nprows <n> - number of rows in processor partition 1623 . -mat_plapack_npcols <n> - number of columns in processor partition 1624 . -mat_plapack_nb <n> - block size of template vector 1625 . -mat_plapack_nb_alg <n> - algorithmic block size 1626 - -mat_plapack_ckerror <n> - error checking flag 1627 1628 Level: intermediate 1629 1630 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1631 1632 M*/ 1633 1634 EXTERN_C_BEGIN 1635 #undef __FUNCT__ 1636 #define __FUNCT__ "MatCreate_MPIDense" 1637 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1638 { 1639 Mat_MPIDense *a; 1640 PetscErrorCode ierr; 1641 1642 PetscFunctionBegin; 1643 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1644 mat->data = (void*)a; 1645 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1646 mat->mapping = 0; 1647 1648 mat->insertmode = NOT_SET_VALUES; 1649 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1650 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1651 1652 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1653 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1654 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1655 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1656 a->nvec = mat->cmap->n; 1657 1658 /* build cache for off array entries formed */ 1659 a->donotstash = PETSC_FALSE; 1660 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1661 1662 /* stuff used for matrix vector multiply */ 1663 a->lvec = 0; 1664 a->Mvctx = 0; 1665 a->roworiented = PETSC_TRUE; 1666 1667 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1668 "MatGetDiagonalBlock_MPIDense", 1669 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1670 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1671 "MatMPIDenseSetPreallocation_MPIDense", 1672 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1673 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1674 "MatMatMult_MPIAIJ_MPIDense", 1675 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1676 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1677 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1678 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1679 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1680 "MatMatMultNumeric_MPIAIJ_MPIDense", 1681 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1682 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1683 "MatGetFactor_mpidense_petsc", 1684 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1685 #if defined(PETSC_HAVE_PLAPACK) 1686 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1687 "MatGetFactor_mpidense_plapack", 1688 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1689 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1690 #endif 1691 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1692 1693 PetscFunctionReturn(0); 1694 } 1695 EXTERN_C_END 1696 1697 /*MC 1698 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1699 1700 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1701 and MATMPIDENSE otherwise. 1702 1703 Options Database Keys: 1704 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1705 1706 Level: beginner 1707 1708 1709 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1710 M*/ 1711 1712 EXTERN_C_BEGIN 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatCreate_Dense" 1715 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1716 { 1717 PetscErrorCode ierr; 1718 PetscMPIInt size; 1719 1720 PetscFunctionBegin; 1721 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1722 if (size == 1) { 1723 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1724 } else { 1725 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1726 } 1727 PetscFunctionReturn(0); 1728 } 1729 EXTERN_C_END 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1733 /*@C 1734 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1735 1736 Not collective 1737 1738 Input Parameters: 1739 . A - the matrix 1740 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1741 to control all matrix memory allocation. 1742 1743 Notes: 1744 The dense format is fully compatible with standard Fortran 77 1745 storage by columns. 1746 1747 The data input variable is intended primarily for Fortran programmers 1748 who wish to allocate their own matrix memory space. Most users should 1749 set data=PETSC_NULL. 1750 1751 Level: intermediate 1752 1753 .keywords: matrix,dense, parallel 1754 1755 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1756 @*/ 1757 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1758 { 1759 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1760 1761 PetscFunctionBegin; 1762 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1763 if (f) { 1764 ierr = (*f)(mat,data);CHKERRQ(ierr); 1765 } 1766 PetscFunctionReturn(0); 1767 } 1768 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatCreateMPIDense" 1771 /*@C 1772 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1773 1774 Collective on MPI_Comm 1775 1776 Input Parameters: 1777 + comm - MPI communicator 1778 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1779 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1780 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1781 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1782 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1783 to control all matrix memory allocation. 1784 1785 Output Parameter: 1786 . A - the matrix 1787 1788 Notes: 1789 The dense format is fully compatible with standard Fortran 77 1790 storage by columns. 1791 1792 The data input variable is intended primarily for Fortran programmers 1793 who wish to allocate their own matrix memory space. Most users should 1794 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1795 1796 The user MUST specify either the local or global matrix dimensions 1797 (possibly both). 1798 1799 Level: intermediate 1800 1801 .keywords: matrix,dense, parallel 1802 1803 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1804 @*/ 1805 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1806 { 1807 PetscErrorCode ierr; 1808 PetscMPIInt size; 1809 1810 PetscFunctionBegin; 1811 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1812 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1813 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1814 if (size > 1) { 1815 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1816 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1817 } else { 1818 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1819 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1820 } 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "MatDuplicate_MPIDense" 1826 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1827 { 1828 Mat mat; 1829 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1830 PetscErrorCode ierr; 1831 1832 PetscFunctionBegin; 1833 *newmat = 0; 1834 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1835 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1836 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1837 a = (Mat_MPIDense*)mat->data; 1838 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1839 mat->factortype = A->factortype; 1840 mat->assembled = PETSC_TRUE; 1841 mat->preallocated = PETSC_TRUE; 1842 1843 mat->rmap->rstart = A->rmap->rstart; 1844 mat->rmap->rend = A->rmap->rend; 1845 a->size = oldmat->size; 1846 a->rank = oldmat->rank; 1847 mat->insertmode = NOT_SET_VALUES; 1848 a->nvec = oldmat->nvec; 1849 a->donotstash = oldmat->donotstash; 1850 1851 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1852 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1853 1854 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1855 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1856 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1857 1858 *newmat = mat; 1859 PetscFunctionReturn(0); 1860 } 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1864 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat) 1865 { 1866 PetscErrorCode ierr; 1867 PetscMPIInt rank,size; 1868 PetscInt *rowners,i,m,nz,j; 1869 PetscScalar *array,*vals,*vals_ptr; 1870 MPI_Status status; 1871 1872 PetscFunctionBegin; 1873 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1874 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1875 1876 /* determine ownership of all rows */ 1877 m = M/size + ((M % size) > rank); 1878 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1879 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1880 rowners[0] = 0; 1881 for (i=2; i<=size; i++) { 1882 rowners[i] += rowners[i-1]; 1883 } 1884 1885 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1886 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1887 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1888 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1889 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1890 1891 if (!rank) { 1892 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1893 1894 /* read in my part of the matrix numerical values */ 1895 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1896 1897 /* insert into matrix-by row (this is why cannot directly read into array */ 1898 vals_ptr = vals; 1899 for (i=0; i<m; i++) { 1900 for (j=0; j<N; j++) { 1901 array[i + j*m] = *vals_ptr++; 1902 } 1903 } 1904 1905 /* read in other processors and ship out */ 1906 for (i=1; i<size; i++) { 1907 nz = (rowners[i+1] - rowners[i])*N; 1908 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1909 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1910 } 1911 } else { 1912 /* receive numeric values */ 1913 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1914 1915 /* receive message of values*/ 1916 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1917 1918 /* insert into matrix-by row (this is why cannot directly read into array */ 1919 vals_ptr = vals; 1920 for (i=0; i<m; i++) { 1921 for (j=0; j<N; j++) { 1922 array[i + j*m] = *vals_ptr++; 1923 } 1924 } 1925 } 1926 ierr = PetscFree(rowners);CHKERRQ(ierr); 1927 ierr = PetscFree(vals);CHKERRQ(ierr); 1928 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1929 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1930 PetscFunctionReturn(0); 1931 } 1932 1933 #undef __FUNCT__ 1934 #define __FUNCT__ "MatLoad_MPIDense" 1935 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1936 { 1937 Mat A; 1938 PetscScalar *vals,*svals; 1939 MPI_Comm comm = ((PetscObject)viewer)->comm; 1940 MPI_Status status; 1941 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1942 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1943 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1944 PetscInt i,nz,j,rstart,rend; 1945 int fd; 1946 PetscErrorCode ierr; 1947 1948 PetscFunctionBegin; 1949 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1950 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1951 if (!rank) { 1952 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1953 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1954 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1955 } 1956 1957 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1958 M = header[1]; N = header[2]; nz = header[3]; 1959 1960 /* 1961 Handle case where matrix is stored on disk as a dense matrix 1962 */ 1963 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1964 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1965 PetscFunctionReturn(0); 1966 } 1967 1968 /* determine ownership of all rows */ 1969 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1970 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1971 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1972 rowners[0] = 0; 1973 for (i=2; i<=size; i++) { 1974 rowners[i] += rowners[i-1]; 1975 } 1976 rstart = rowners[rank]; 1977 rend = rowners[rank+1]; 1978 1979 /* distribute row lengths to all processors */ 1980 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1981 if (!rank) { 1982 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1983 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1984 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1985 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1986 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1987 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1988 } else { 1989 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1990 } 1991 1992 if (!rank) { 1993 /* calculate the number of nonzeros on each processor */ 1994 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1995 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1996 for (i=0; i<size; i++) { 1997 for (j=rowners[i]; j< rowners[i+1]; j++) { 1998 procsnz[i] += rowlengths[j]; 1999 } 2000 } 2001 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2002 2003 /* determine max buffer needed and allocate it */ 2004 maxnz = 0; 2005 for (i=0; i<size; i++) { 2006 maxnz = PetscMax(maxnz,procsnz[i]); 2007 } 2008 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2009 2010 /* read in my part of the matrix column indices */ 2011 nz = procsnz[0]; 2012 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2013 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2014 2015 /* read in every one elses and ship off */ 2016 for (i=1; i<size; i++) { 2017 nz = procsnz[i]; 2018 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2019 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2020 } 2021 ierr = PetscFree(cols);CHKERRQ(ierr); 2022 } else { 2023 /* determine buffer space needed for message */ 2024 nz = 0; 2025 for (i=0; i<m; i++) { 2026 nz += ourlens[i]; 2027 } 2028 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2029 2030 /* receive message of column indices*/ 2031 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2032 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2033 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2034 } 2035 2036 /* loop over local rows, determining number of off diagonal entries */ 2037 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2038 jj = 0; 2039 for (i=0; i<m; i++) { 2040 for (j=0; j<ourlens[i]; j++) { 2041 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2042 jj++; 2043 } 2044 } 2045 2046 /* create our matrix */ 2047 for (i=0; i<m; i++) { 2048 ourlens[i] -= offlens[i]; 2049 } 2050 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 2051 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2052 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 2053 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 2054 A = *newmat; 2055 for (i=0; i<m; i++) { 2056 ourlens[i] += offlens[i]; 2057 } 2058 2059 if (!rank) { 2060 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2061 2062 /* read in my part of the matrix numerical values */ 2063 nz = procsnz[0]; 2064 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2065 2066 /* insert into matrix */ 2067 jj = rstart; 2068 smycols = mycols; 2069 svals = vals; 2070 for (i=0; i<m; i++) { 2071 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2072 smycols += ourlens[i]; 2073 svals += ourlens[i]; 2074 jj++; 2075 } 2076 2077 /* read in other processors and ship out */ 2078 for (i=1; i<size; i++) { 2079 nz = procsnz[i]; 2080 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2081 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2082 } 2083 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2084 } else { 2085 /* receive numeric values */ 2086 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2087 2088 /* receive message of values*/ 2089 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2090 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2091 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2092 2093 /* insert into matrix */ 2094 jj = rstart; 2095 smycols = mycols; 2096 svals = vals; 2097 for (i=0; i<m; i++) { 2098 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2099 smycols += ourlens[i]; 2100 svals += ourlens[i]; 2101 jj++; 2102 } 2103 } 2104 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2105 ierr = PetscFree(vals);CHKERRQ(ierr); 2106 ierr = PetscFree(mycols);CHKERRQ(ierr); 2107 ierr = PetscFree(rowners);CHKERRQ(ierr); 2108 2109 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2110 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2111 PetscFunctionReturn(0); 2112 } 2113 2114 #undef __FUNCT__ 2115 #define __FUNCT__ "MatEqual_MPIDense" 2116 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2117 { 2118 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2119 Mat a,b; 2120 PetscTruth flg; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 a = matA->A; 2125 b = matB->A; 2126 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2127 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2128 PetscFunctionReturn(0); 2129 } 2130 2131 #if defined(PETSC_HAVE_PLAPACK) 2132 2133 #undef __FUNCT__ 2134 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2135 /*@C 2136 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2137 Level: developer 2138 2139 .keywords: Petsc, destroy, package, PLAPACK 2140 .seealso: PetscFinalize() 2141 @*/ 2142 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2143 { 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 ierr = PLA_Finalize();CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2153 /*@C 2154 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2155 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2156 2157 Input Parameter: 2158 . comm - the communicator the matrix lives on 2159 2160 Level: developer 2161 2162 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2163 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2164 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2165 cannot overlap. 2166 2167 .keywords: Petsc, initialize, package, PLAPACK 2168 .seealso: PetscInitializePackage(), PetscInitialize() 2169 @*/ 2170 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2171 { 2172 PetscMPIInt size; 2173 PetscErrorCode ierr; 2174 2175 PetscFunctionBegin; 2176 if (!PLA_Initialized(PETSC_NULL)) { 2177 2178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2179 Plapack_nprows = 1; 2180 Plapack_npcols = size; 2181 2182 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2183 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2184 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2185 #if defined(PETSC_USE_DEBUG) 2186 Plapack_ierror = 3; 2187 #else 2188 Plapack_ierror = 0; 2189 #endif 2190 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2191 if (Plapack_ierror){ 2192 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2193 } else { 2194 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2195 } 2196 2197 Plapack_nb_alg = 0; 2198 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2199 if (Plapack_nb_alg) { 2200 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2201 } 2202 PetscOptionsEnd(); 2203 2204 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2205 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2206 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2207 } 2208 PetscFunctionReturn(0); 2209 } 2210 2211 #endif 2212