1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 EXTERN_C_BEGIN 13 #include "PLA.h" 14 EXTERN_C_END 15 16 typedef struct { 17 PLA_Obj A,pivots; 18 PLA_Template templ; 19 MPI_Datatype datatype; 20 PetscInt nb,rstart; 21 VecScatter ctx; 22 IS is_pla,is_petsc; 23 PetscTruth pla_solved; 24 MatStructure mstruct; 25 } Mat_Plapack; 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatDenseGetLocalMatrix" 30 /*@ 31 32 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 33 matrix that represents the operator. For sequential matrices it returns itself. 34 35 Input Parameter: 36 . A - the Seq or MPI dense matrix 37 38 Output Parameter: 39 . B - the inner matrix 40 41 Level: intermediate 42 43 @*/ 44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 45 { 46 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 47 PetscErrorCode ierr; 48 PetscTruth flg; 49 50 PetscFunctionBegin; 51 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 52 if (flg) { 53 *B = mat->A; 54 } else { 55 *B = A; 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatGetRow_MPIDense" 62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 63 { 64 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 65 PetscErrorCode ierr; 66 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 67 68 PetscFunctionBegin; 69 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 70 lrow = row - rstart; 71 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatRestoreRow_MPIDense" 77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 83 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 84 PetscFunctionReturn(0); 85 } 86 87 EXTERN_C_BEGIN 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 91 { 92 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 93 PetscErrorCode ierr; 94 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 95 PetscScalar *array; 96 MPI_Comm comm; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 100 101 /* The reuse aspect is not implemented efficiently */ 102 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 103 104 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 105 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 106 ierr = MatCreate(comm,B);CHKERRQ(ierr); 107 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 108 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 109 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 110 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 111 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 113 114 *iscopy = PETSC_TRUE; 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatSetValues_MPIDense" 121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 122 { 123 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 124 PetscErrorCode ierr; 125 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 126 PetscTruth roworiented = A->roworiented; 127 128 PetscFunctionBegin; 129 if (v) PetscValidScalarPointer(v,6); 130 for (i=0; i<m; i++) { 131 if (idxm[i] < 0) continue; 132 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 133 if (idxm[i] >= rstart && idxm[i] < rend) { 134 row = idxm[i] - rstart; 135 if (roworiented) { 136 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 137 } else { 138 for (j=0; j<n; j++) { 139 if (idxn[j] < 0) continue; 140 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 141 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 142 } 143 } 144 } else { 145 if (!A->donotstash) { 146 if (roworiented) { 147 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 148 } else { 149 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 150 } 151 } 152 } 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetValues_MPIDense" 159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 160 { 161 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 164 165 PetscFunctionBegin; 166 for (i=0; i<m; i++) { 167 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 168 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 169 if (idxm[i] >= rstart && idxm[i] < rend) { 170 row = idxm[i] - rstart; 171 for (j=0; j<n; j++) { 172 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 173 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 174 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 175 } 176 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 177 } 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatGetArray_MPIDense" 183 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 184 { 185 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 186 PetscErrorCode ierr; 187 188 PetscFunctionBegin; 189 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 195 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 196 { 197 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 198 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 199 PetscErrorCode ierr; 200 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 201 const PetscInt *irow,*icol; 202 PetscScalar *av,*bv,*v = lmat->v; 203 Mat newmat; 204 IS iscol_local; 205 206 PetscFunctionBegin; 207 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 208 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 209 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 210 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 211 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 212 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 213 214 /* No parallel redistribution currently supported! Should really check each index set 215 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 216 original matrix! */ 217 218 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 219 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 220 221 /* Check submatrix call */ 222 if (scall == MAT_REUSE_MATRIX) { 223 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 224 /* Really need to test rows and column sizes! */ 225 newmat = *B; 226 } else { 227 /* Create and fill new matrix */ 228 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 229 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 230 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 231 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 232 } 233 234 /* Now extract the data pointers and do the copy, column at a time */ 235 newmatd = (Mat_MPIDense*)newmat->data; 236 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 237 238 for (i=0; i<Ncols; i++) { 239 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 240 for (j=0; j<nrows; j++) { 241 *bv++ = av[irow[j] - rstart]; 242 } 243 } 244 245 /* Assemble the matrices so that the correct flags are set */ 246 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 248 249 /* Free work space */ 250 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 251 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 252 *B = newmat; 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatRestoreArray_MPIDense" 258 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 259 { 260 PetscFunctionBegin; 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 266 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 267 { 268 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 269 MPI_Comm comm = ((PetscObject)mat)->comm; 270 PetscErrorCode ierr; 271 PetscInt nstash,reallocs; 272 InsertMode addv; 273 274 PetscFunctionBegin; 275 /* make sure all processors are either in INSERTMODE or ADDMODE */ 276 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 277 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 278 mat->insertmode = addv; /* in case this processor had no cache */ 279 280 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 281 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 282 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 288 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 289 { 290 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 291 PetscErrorCode ierr; 292 PetscInt i,*row,*col,flg,j,rstart,ncols; 293 PetscMPIInt n; 294 PetscScalar *val; 295 InsertMode addv=mat->insertmode; 296 297 PetscFunctionBegin; 298 /* wait on receives */ 299 while (1) { 300 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 301 if (!flg) break; 302 303 for (i=0; i<n;) { 304 /* Now identify the consecutive vals belonging to the same row */ 305 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 306 if (j < n) ncols = j-i; 307 else ncols = n-i; 308 /* Now assemble all these values with a single function call */ 309 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 310 i = j; 311 } 312 } 313 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 314 315 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 316 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 317 318 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 319 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatZeroEntries_MPIDense" 326 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 327 { 328 PetscErrorCode ierr; 329 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 330 331 PetscFunctionBegin; 332 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 333 PetscFunctionReturn(0); 334 } 335 336 /* the code does not do the diagonal entries correctly unless the 337 matrix is square and the column and row owerships are identical. 338 This is a BUG. The only way to fix it seems to be to access 339 mdn->A and mdn->B directly and not through the MatZeroRows() 340 routine. 341 */ 342 #undef __FUNCT__ 343 #define __FUNCT__ "MatZeroRows_MPIDense" 344 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 345 { 346 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 347 PetscErrorCode ierr; 348 PetscInt i,*owners = A->rmap->range; 349 PetscInt *nprocs,j,idx,nsends; 350 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 351 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 352 PetscInt *lens,*lrows,*values; 353 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 354 MPI_Comm comm = ((PetscObject)A)->comm; 355 MPI_Request *send_waits,*recv_waits; 356 MPI_Status recv_status,*send_status; 357 PetscTruth found; 358 359 PetscFunctionBegin; 360 /* first count number of contributors to each processor */ 361 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 362 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 363 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 364 for (i=0; i<N; i++) { 365 idx = rows[i]; 366 found = PETSC_FALSE; 367 for (j=0; j<size; j++) { 368 if (idx >= owners[j] && idx < owners[j+1]) { 369 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 370 } 371 } 372 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 373 } 374 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 375 376 /* inform other processors of number of messages and max length*/ 377 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 378 379 /* post receives: */ 380 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 381 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 382 for (i=0; i<nrecvs; i++) { 383 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 384 } 385 386 /* do sends: 387 1) starts[i] gives the starting index in svalues for stuff going to 388 the ith processor 389 */ 390 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 391 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 392 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 393 starts[0] = 0; 394 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 395 for (i=0; i<N; i++) { 396 svalues[starts[owner[i]]++] = rows[i]; 397 } 398 399 starts[0] = 0; 400 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 401 count = 0; 402 for (i=0; i<size; i++) { 403 if (nprocs[2*i+1]) { 404 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 405 } 406 } 407 ierr = PetscFree(starts);CHKERRQ(ierr); 408 409 base = owners[rank]; 410 411 /* wait on receives */ 412 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 413 count = nrecvs; 414 slen = 0; 415 while (count) { 416 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 417 /* unpack receives into our local space */ 418 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 419 source[imdex] = recv_status.MPI_SOURCE; 420 lens[imdex] = n; 421 slen += n; 422 count--; 423 } 424 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 425 426 /* move the data into the send scatter */ 427 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 428 count = 0; 429 for (i=0; i<nrecvs; i++) { 430 values = rvalues + i*nmax; 431 for (j=0; j<lens[i]; j++) { 432 lrows[count++] = values[j] - base; 433 } 434 } 435 ierr = PetscFree(rvalues);CHKERRQ(ierr); 436 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 437 ierr = PetscFree(owner);CHKERRQ(ierr); 438 ierr = PetscFree(nprocs);CHKERRQ(ierr); 439 440 /* actually zap the local rows */ 441 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 442 ierr = PetscFree(lrows);CHKERRQ(ierr); 443 444 /* wait on sends */ 445 if (nsends) { 446 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 447 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 448 ierr = PetscFree(send_status);CHKERRQ(ierr); 449 } 450 ierr = PetscFree(send_waits);CHKERRQ(ierr); 451 ierr = PetscFree(svalues);CHKERRQ(ierr); 452 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatMult_MPIDense" 458 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 459 { 460 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 465 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 467 PetscFunctionReturn(0); 468 } 469 470 #undef __FUNCT__ 471 #define __FUNCT__ "MatMultAdd_MPIDense" 472 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 477 PetscFunctionBegin; 478 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 480 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatMultTranspose_MPIDense" 486 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 487 { 488 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 489 PetscErrorCode ierr; 490 PetscScalar zero = 0.0; 491 492 PetscFunctionBegin; 493 ierr = VecSet(yy,zero);CHKERRQ(ierr); 494 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 495 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 496 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 502 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 503 { 504 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 509 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 510 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 511 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 512 PetscFunctionReturn(0); 513 } 514 515 #undef __FUNCT__ 516 #define __FUNCT__ "MatGetDiagonal_MPIDense" 517 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 518 { 519 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 520 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 521 PetscErrorCode ierr; 522 PetscInt len,i,n,m = A->rmap->n,radd; 523 PetscScalar *x,zero = 0.0; 524 525 PetscFunctionBegin; 526 ierr = VecSet(v,zero);CHKERRQ(ierr); 527 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 528 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 529 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 530 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 531 radd = A->rmap->rstart*m; 532 for (i=0; i<len; i++) { 533 x[i] = aloc->v[radd + i*m + i]; 534 } 535 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatDestroy_MPIDense" 541 PetscErrorCode MatDestroy_MPIDense(Mat mat) 542 { 543 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 544 PetscErrorCode ierr; 545 #if defined(PETSC_HAVE_PLAPACK) 546 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 547 #endif 548 549 PetscFunctionBegin; 550 551 #if defined(PETSC_USE_LOG) 552 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 553 #endif 554 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 555 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 556 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 557 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 558 #if defined(PETSC_HAVE_PLAPACK) 559 if (lu) { 560 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 561 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 562 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 563 564 if (lu->is_pla) { 565 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 566 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 567 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 568 } 569 } 570 #endif 571 572 ierr = PetscFree(mdn);CHKERRQ(ierr); 573 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatView_MPIDense_Binary" 584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 585 { 586 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 587 PetscErrorCode ierr; 588 PetscViewerFormat format; 589 int fd; 590 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 591 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 592 PetscScalar *work,*v,*vv; 593 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 594 MPI_Status status; 595 596 PetscFunctionBegin; 597 if (mdn->size == 1) { 598 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 599 } else { 600 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 601 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 602 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 603 604 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 605 if (format == PETSC_VIEWER_NATIVE) { 606 607 if (!rank) { 608 /* store the matrix as a dense matrix */ 609 header[0] = MAT_FILE_CLASSID; 610 header[1] = mat->rmap->N; 611 header[2] = N; 612 header[3] = MATRIX_BINARY_FORMAT_DENSE; 613 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 614 615 /* get largest work array needed for transposing array */ 616 mmax = mat->rmap->n; 617 for (i=1; i<size; i++) { 618 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 619 } 620 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 621 622 /* write out local array, by rows */ 623 m = mat->rmap->n; 624 v = a->v; 625 for (j=0; j<N; j++) { 626 for (i=0; i<m; i++) { 627 work[j + i*N] = *v++; 628 } 629 } 630 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 631 /* get largest work array to receive messages from other processes, excludes process zero */ 632 mmax = 0; 633 for (i=1; i<size; i++) { 634 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 635 } 636 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 637 for(k = 1; k < size; k++) { 638 v = vv; 639 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 640 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 641 642 for(j = 0; j < N; j++) { 643 for(i = 0; i < m; i++) { 644 work[j + i*N] = *v++; 645 } 646 } 647 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 648 } 649 ierr = PetscFree(work);CHKERRQ(ierr); 650 ierr = PetscFree(vv);CHKERRQ(ierr); 651 } else { 652 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 653 } 654 } else { 655 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 656 } 657 } 658 PetscFunctionReturn(0); 659 } 660 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 663 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 664 { 665 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 666 PetscErrorCode ierr; 667 PetscMPIInt size = mdn->size,rank = mdn->rank; 668 const PetscViewerType vtype; 669 PetscTruth iascii,isdraw; 670 PetscViewer sviewer; 671 PetscViewerFormat format; 672 #if defined(PETSC_HAVE_PLAPACK) 673 Mat_Plapack *lu; 674 #endif 675 676 PetscFunctionBegin; 677 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 678 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 679 if (iascii) { 680 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 681 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 682 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 683 MatInfo info; 684 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 685 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 686 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 687 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 688 #if defined(PETSC_HAVE_PLAPACK) 689 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 690 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 691 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 692 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 693 if (mat->factortype){ 694 lu=(Mat_Plapack*)(mat->spptr); 695 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 696 } 697 #else 698 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 699 #endif 700 PetscFunctionReturn(0); 701 } else if (format == PETSC_VIEWER_ASCII_INFO) { 702 PetscFunctionReturn(0); 703 } 704 } else if (isdraw) { 705 PetscDraw draw; 706 PetscTruth isnull; 707 708 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 709 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 710 if (isnull) PetscFunctionReturn(0); 711 } 712 713 if (size == 1) { 714 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 715 } else { 716 /* assemble the entire matrix onto first processor. */ 717 Mat A; 718 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 719 PetscInt *cols; 720 PetscScalar *vals; 721 722 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 723 if (!rank) { 724 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 725 } else { 726 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 727 } 728 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 729 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 730 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 731 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 732 733 /* Copy the matrix ... This isn't the most efficient means, 734 but it's quick for now */ 735 A->insertmode = INSERT_VALUES; 736 row = mat->rmap->rstart; m = mdn->A->rmap->n; 737 for (i=0; i<m; i++) { 738 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 739 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 740 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 741 row++; 742 } 743 744 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 745 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 746 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 747 if (!rank) { 748 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 749 } 750 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 751 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 752 ierr = MatDestroy(A);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatView_MPIDense" 759 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 760 { 761 PetscErrorCode ierr; 762 PetscTruth iascii,isbinary,isdraw,issocket; 763 764 PetscFunctionBegin; 765 766 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 767 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 768 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 769 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 770 771 if (iascii || issocket || isdraw) { 772 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 773 } else if (isbinary) { 774 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 775 } else { 776 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 777 } 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatGetInfo_MPIDense" 783 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 784 { 785 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 786 Mat mdn = mat->A; 787 PetscErrorCode ierr; 788 PetscReal isend[5],irecv[5]; 789 790 PetscFunctionBegin; 791 info->block_size = 1.0; 792 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 793 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 794 isend[3] = info->memory; isend[4] = info->mallocs; 795 if (flag == MAT_LOCAL) { 796 info->nz_used = isend[0]; 797 info->nz_allocated = isend[1]; 798 info->nz_unneeded = isend[2]; 799 info->memory = isend[3]; 800 info->mallocs = isend[4]; 801 } else if (flag == MAT_GLOBAL_MAX) { 802 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 803 info->nz_used = irecv[0]; 804 info->nz_allocated = irecv[1]; 805 info->nz_unneeded = irecv[2]; 806 info->memory = irecv[3]; 807 info->mallocs = irecv[4]; 808 } else if (flag == MAT_GLOBAL_SUM) { 809 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 810 info->nz_used = irecv[0]; 811 info->nz_allocated = irecv[1]; 812 info->nz_unneeded = irecv[2]; 813 info->memory = irecv[3]; 814 info->mallocs = irecv[4]; 815 } 816 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 817 info->fill_ratio_needed = 0; 818 info->factor_mallocs = 0; 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatSetOption_MPIDense" 824 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 825 { 826 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 switch (op) { 831 case MAT_NEW_NONZERO_LOCATIONS: 832 case MAT_NEW_NONZERO_LOCATION_ERR: 833 case MAT_NEW_NONZERO_ALLOCATION_ERR: 834 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 835 break; 836 case MAT_ROW_ORIENTED: 837 a->roworiented = flg; 838 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 839 break; 840 case MAT_NEW_DIAGONALS: 841 case MAT_USE_HASH_TABLE: 842 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 843 break; 844 case MAT_IGNORE_OFF_PROC_ENTRIES: 845 a->donotstash = flg; 846 break; 847 case MAT_SYMMETRIC: 848 case MAT_STRUCTURALLY_SYMMETRIC: 849 case MAT_HERMITIAN: 850 case MAT_SYMMETRY_ETERNAL: 851 case MAT_IGNORE_LOWER_TRIANGULAR: 852 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 853 break; 854 default: 855 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 856 } 857 PetscFunctionReturn(0); 858 } 859 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatDiagonalScale_MPIDense" 863 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 864 { 865 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 866 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 867 PetscScalar *l,*r,x,*v; 868 PetscErrorCode ierr; 869 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 870 871 PetscFunctionBegin; 872 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 873 if (ll) { 874 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 875 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 876 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 877 for (i=0; i<m; i++) { 878 x = l[i]; 879 v = mat->v + i; 880 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 881 } 882 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 883 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 884 } 885 if (rr) { 886 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 887 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 888 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 891 for (i=0; i<n; i++) { 892 x = r[i]; 893 v = mat->v + i*m; 894 for (j=0; j<m; j++) { (*v++) *= x;} 895 } 896 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 897 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 898 } 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatNorm_MPIDense" 904 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 905 { 906 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 907 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 908 PetscErrorCode ierr; 909 PetscInt i,j; 910 PetscReal sum = 0.0; 911 PetscScalar *v = mat->v; 912 913 PetscFunctionBegin; 914 if (mdn->size == 1) { 915 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 916 } else { 917 if (type == NORM_FROBENIUS) { 918 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 919 #if defined(PETSC_USE_COMPLEX) 920 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 921 #else 922 sum += (*v)*(*v); v++; 923 #endif 924 } 925 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 926 *nrm = sqrt(*nrm); 927 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 928 } else if (type == NORM_1) { 929 PetscReal *tmp,*tmp2; 930 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 931 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 932 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 933 *nrm = 0.0; 934 v = mat->v; 935 for (j=0; j<mdn->A->cmap->n; j++) { 936 for (i=0; i<mdn->A->rmap->n; i++) { 937 tmp[j] += PetscAbsScalar(*v); v++; 938 } 939 } 940 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 941 for (j=0; j<A->cmap->N; j++) { 942 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 943 } 944 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 945 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 946 } else if (type == NORM_INFINITY) { /* max row norm */ 947 PetscReal ntemp; 948 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 949 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 950 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 951 } 952 PetscFunctionReturn(0); 953 } 954 955 #undef __FUNCT__ 956 #define __FUNCT__ "MatTranspose_MPIDense" 957 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 958 { 959 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 960 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 961 Mat B; 962 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 963 PetscErrorCode ierr; 964 PetscInt j,i; 965 PetscScalar *v; 966 967 PetscFunctionBegin; 968 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 969 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 970 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 971 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 972 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 973 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 974 } else { 975 B = *matout; 976 } 977 978 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 979 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 980 for (i=0; i<m; i++) rwork[i] = rstart + i; 981 for (j=0; j<n; j++) { 982 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 983 v += m; 984 } 985 ierr = PetscFree(rwork);CHKERRQ(ierr); 986 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 987 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 988 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 989 *matout = B; 990 } else { 991 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 992 } 993 PetscFunctionReturn(0); 994 } 995 996 997 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 998 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 999 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1002 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1003 { 1004 PetscErrorCode ierr; 1005 1006 PetscFunctionBegin; 1007 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #if defined(PETSC_HAVE_PLAPACK) 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1015 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1016 { 1017 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1018 PetscErrorCode ierr; 1019 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1020 PetscScalar *array; 1021 PetscReal one = 1.0; 1022 1023 PetscFunctionBegin; 1024 /* Copy A into F->lu->A */ 1025 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1026 ierr = PLA_API_begin();CHKERRQ(ierr); 1027 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1028 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1029 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1030 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1031 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1032 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1033 ierr = PLA_API_end();CHKERRQ(ierr); 1034 lu->rstart = rstart; 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1040 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1041 { 1042 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1043 PetscErrorCode ierr; 1044 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1045 PetscScalar *array; 1046 PetscReal one = 1.0; 1047 1048 PetscFunctionBegin; 1049 /* Copy F into A->lu->A */ 1050 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1051 ierr = PLA_API_begin();CHKERRQ(ierr); 1052 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1053 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1054 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1055 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1056 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1057 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1058 ierr = PLA_API_end();CHKERRQ(ierr); 1059 lu->rstart = rstart; 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1065 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1066 { 1067 PetscErrorCode ierr; 1068 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1069 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1070 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1071 PLA_Obj alpha = NULL,beta = NULL; 1072 1073 PetscFunctionBegin; 1074 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1075 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1076 1077 /* 1078 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1079 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1080 */ 1081 1082 /* do the multiply in PLA */ 1083 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1084 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1085 CHKMEMQ; 1086 1087 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1088 CHKMEMQ; 1089 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1090 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1091 1092 /* 1093 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1094 */ 1095 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1101 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1102 { 1103 PetscErrorCode ierr; 1104 PetscInt m=A->rmap->n,n=B->cmap->n; 1105 Mat Cmat; 1106 1107 PetscFunctionBegin; 1108 if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1109 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1110 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1111 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1112 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1113 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1114 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1115 1116 *C = Cmat; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1122 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1123 { 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 if (scall == MAT_INITIAL_MATRIX){ 1128 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1129 } 1130 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatSolve_MPIDense" 1136 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1137 { 1138 MPI_Comm comm = ((PetscObject)A)->comm; 1139 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1140 PetscErrorCode ierr; 1141 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1142 PetscScalar *array; 1143 PetscReal one = 1.0; 1144 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1145 PLA_Obj v_pla = NULL; 1146 PetscScalar *loc_buf; 1147 Vec loc_x; 1148 1149 PetscFunctionBegin; 1150 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1151 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1152 1153 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1154 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1155 PLA_Obj_set_to_zero(v_pla); 1156 1157 /* Copy b into rhs_pla */ 1158 PLA_API_begin(); 1159 PLA_Obj_API_open(v_pla); 1160 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1161 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1162 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1163 PLA_Obj_API_close(v_pla); 1164 PLA_API_end(); 1165 1166 if (A->factortype == MAT_FACTOR_LU){ 1167 /* Apply the permutations to the right hand sides */ 1168 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1169 1170 /* Solve L y = b, overwriting b with y */ 1171 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1172 1173 /* Solve U x = y (=b), overwriting b with x */ 1174 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1175 } else { /* MAT_FACTOR_CHOLESKY */ 1176 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1177 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1178 PLA_NONUNIT_DIAG,lu->A,v_pla); 1179 } 1180 1181 /* Copy PLAPACK x into Petsc vector x */ 1182 PLA_Obj_local_length(v_pla, &loc_m); 1183 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1184 PLA_Obj_local_stride(v_pla, &loc_stride); 1185 /* 1186 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1187 */ 1188 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1189 if (!lu->pla_solved){ 1190 1191 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1192 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1193 1194 /* Create IS and cts for VecScatterring */ 1195 PLA_Obj_local_length(v_pla, &loc_m); 1196 PLA_Obj_local_stride(v_pla, &loc_stride); 1197 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1198 1199 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1200 for (i=0; i<loc_m; i+=lu->nb){ 1201 j = 0; 1202 while (j < lu->nb && i+j < loc_m){ 1203 idx_petsc[i+j] = rstart + j; j++; 1204 } 1205 rstart += size*lu->nb; 1206 } 1207 1208 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1209 1210 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1211 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1212 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1213 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1214 } 1215 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1216 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1217 1218 /* Free data */ 1219 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1220 PLA_Obj_free(&v_pla); 1221 1222 lu->pla_solved = PETSC_TRUE; 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1228 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1229 { 1230 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1231 PetscErrorCode ierr; 1232 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1233 PetscInt info_pla=0; 1234 PetscScalar *array,one = 1.0; 1235 1236 PetscFunctionBegin; 1237 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1238 PLA_Obj_free(&lu->A); 1239 PLA_Obj_free (&lu->pivots); 1240 } 1241 /* Create PLAPACK matrix object */ 1242 lu->A = NULL; lu->pivots = NULL; 1243 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1244 PLA_Obj_set_to_zero(lu->A); 1245 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1246 1247 /* Copy A into lu->A */ 1248 PLA_API_begin(); 1249 PLA_Obj_API_open(lu->A); 1250 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1251 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1252 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1253 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1254 PLA_Obj_API_close(lu->A); 1255 PLA_API_end(); 1256 1257 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1258 info_pla = PLA_LU(lu->A,lu->pivots); 1259 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1260 1261 lu->rstart = rstart; 1262 lu->mstruct = SAME_NONZERO_PATTERN; 1263 F->ops->solve = MatSolve_MPIDense; 1264 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1270 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1271 { 1272 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1273 PetscErrorCode ierr; 1274 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1275 PetscInt info_pla=0; 1276 PetscScalar *array,one = 1.0; 1277 1278 PetscFunctionBegin; 1279 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1280 PLA_Obj_free(&lu->A); 1281 } 1282 /* Create PLAPACK matrix object */ 1283 lu->A = NULL; 1284 lu->pivots = NULL; 1285 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1286 1287 /* Copy A into lu->A */ 1288 PLA_API_begin(); 1289 PLA_Obj_API_open(lu->A); 1290 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1291 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1292 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1293 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1294 PLA_Obj_API_close(lu->A); 1295 PLA_API_end(); 1296 1297 /* Factor P A -> Chol */ 1298 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1299 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1300 1301 lu->rstart = rstart; 1302 lu->mstruct = SAME_NONZERO_PATTERN; 1303 F->ops->solve = MatSolve_MPIDense; 1304 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1305 PetscFunctionReturn(0); 1306 } 1307 1308 /* Note the Petsc perm permutation is ignored */ 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1311 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1312 { 1313 PetscErrorCode ierr; 1314 PetscTruth issymmetric,set; 1315 1316 PetscFunctionBegin; 1317 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1318 if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1319 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1320 PetscFunctionReturn(0); 1321 } 1322 1323 /* Note the Petsc r and c permutations are ignored */ 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1326 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1327 { 1328 PetscErrorCode ierr; 1329 PetscInt M = A->rmap->N; 1330 Mat_Plapack *lu; 1331 1332 PetscFunctionBegin; 1333 lu = (Mat_Plapack*)F->spptr; 1334 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1335 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1336 PetscFunctionReturn(0); 1337 } 1338 1339 EXTERN_C_BEGIN 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1342 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1343 { 1344 PetscFunctionBegin; 1345 *type = MATSOLVERPLAPACK; 1346 PetscFunctionReturn(0); 1347 } 1348 EXTERN_C_END 1349 1350 EXTERN_C_BEGIN 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1353 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1354 { 1355 PetscErrorCode ierr; 1356 Mat_Plapack *lu; 1357 PetscMPIInt size; 1358 PetscInt M=A->rmap->N; 1359 1360 PetscFunctionBegin; 1361 /* Create the factorization matrix */ 1362 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1363 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1364 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1365 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1366 (*F)->spptr = (void*)lu; 1367 1368 /* Set default Plapack parameters */ 1369 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1370 lu->nb = M/size; 1371 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1372 1373 /* Set runtime options */ 1374 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1375 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1376 PetscOptionsEnd(); 1377 1378 /* Create object distribution template */ 1379 lu->templ = NULL; 1380 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1381 1382 /* Set the datatype */ 1383 #if defined(PETSC_USE_COMPLEX) 1384 lu->datatype = MPI_DOUBLE_COMPLEX; 1385 #else 1386 lu->datatype = MPI_DOUBLE; 1387 #endif 1388 1389 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1390 1391 1392 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1393 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1394 1395 if (ftype == MAT_FACTOR_LU) { 1396 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1397 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1398 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1399 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1400 (*F)->factortype = ftype; 1401 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 EXTERN_C_END 1405 #endif 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1409 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1410 { 1411 #if defined(PETSC_HAVE_PLAPACK) 1412 PetscErrorCode ierr; 1413 #endif 1414 1415 PetscFunctionBegin; 1416 #if defined(PETSC_HAVE_PLAPACK) 1417 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1418 #else 1419 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1420 #endif 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatAXPY_MPIDense" 1426 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1427 { 1428 PetscErrorCode ierr; 1429 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1430 1431 PetscFunctionBegin; 1432 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatConjugate_MPIDense" 1438 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat) 1439 { 1440 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1441 PetscErrorCode ierr; 1442 1443 PetscFunctionBegin; 1444 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 #undef __FUNCT__ 1449 #define __FUNCT__ "MatRealPart_MPIDense" 1450 PetscErrorCode MatRealPart_MPIDense(Mat A) 1451 { 1452 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1453 PetscErrorCode ierr; 1454 1455 PetscFunctionBegin; 1456 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1457 PetscFunctionReturn(0); 1458 } 1459 1460 #undef __FUNCT__ 1461 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1462 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1463 { 1464 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1465 PetscErrorCode ierr; 1466 1467 PetscFunctionBegin; 1468 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } 1471 1472 /* -------------------------------------------------------------------*/ 1473 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1474 MatGetRow_MPIDense, 1475 MatRestoreRow_MPIDense, 1476 MatMult_MPIDense, 1477 /* 4*/ MatMultAdd_MPIDense, 1478 MatMultTranspose_MPIDense, 1479 MatMultTransposeAdd_MPIDense, 1480 0, 1481 0, 1482 0, 1483 /*10*/ 0, 1484 0, 1485 0, 1486 0, 1487 MatTranspose_MPIDense, 1488 /*15*/ MatGetInfo_MPIDense, 1489 MatEqual_MPIDense, 1490 MatGetDiagonal_MPIDense, 1491 MatDiagonalScale_MPIDense, 1492 MatNorm_MPIDense, 1493 /*20*/ MatAssemblyBegin_MPIDense, 1494 MatAssemblyEnd_MPIDense, 1495 MatSetOption_MPIDense, 1496 MatZeroEntries_MPIDense, 1497 /*24*/ MatZeroRows_MPIDense, 1498 0, 1499 0, 1500 0, 1501 0, 1502 /*29*/ MatSetUpPreallocation_MPIDense, 1503 0, 1504 0, 1505 MatGetArray_MPIDense, 1506 MatRestoreArray_MPIDense, 1507 /*34*/ MatDuplicate_MPIDense, 1508 0, 1509 0, 1510 0, 1511 0, 1512 /*39*/ MatAXPY_MPIDense, 1513 MatGetSubMatrices_MPIDense, 1514 0, 1515 MatGetValues_MPIDense, 1516 0, 1517 /*44*/ 0, 1518 MatScale_MPIDense, 1519 0, 1520 0, 1521 0, 1522 /*49*/ 0, 1523 0, 1524 0, 1525 0, 1526 0, 1527 /*54*/ 0, 1528 0, 1529 0, 1530 0, 1531 0, 1532 /*59*/ MatGetSubMatrix_MPIDense, 1533 MatDestroy_MPIDense, 1534 MatView_MPIDense, 1535 0, 1536 0, 1537 /*64*/ 0, 1538 0, 1539 0, 1540 0, 1541 0, 1542 /*69*/ 0, 1543 0, 1544 0, 1545 0, 1546 0, 1547 /*74*/ 0, 1548 0, 1549 0, 1550 0, 1551 0, 1552 /*79*/ 0, 1553 0, 1554 0, 1555 0, 1556 /*83*/ MatLoad_MPIDense, 1557 0, 1558 0, 1559 0, 1560 0, 1561 0, 1562 /*89*/ 1563 #if defined(PETSC_HAVE_PLAPACK) 1564 MatMatMult_MPIDense_MPIDense, 1565 MatMatMultSymbolic_MPIDense_MPIDense, 1566 MatMatMultNumeric_MPIDense_MPIDense, 1567 #else 1568 0, 1569 0, 1570 0, 1571 #endif 1572 0, 1573 0, 1574 /*94*/ 0, 1575 0, 1576 0, 1577 0, 1578 0, 1579 /*99*/ 0, 1580 0, 1581 0, 1582 MatConjugate_MPIDense, 1583 0, 1584 /*104*/0, 1585 MatRealPart_MPIDense, 1586 MatImaginaryPart_MPIDense, 1587 0, 1588 0, 1589 /*109*/0, 1590 0, 1591 0, 1592 0, 1593 0, 1594 /*114*/0, 1595 0, 1596 0, 1597 0, 1598 0, 1599 /*119*/0, 1600 0, 1601 0, 1602 0 1603 }; 1604 1605 EXTERN_C_BEGIN 1606 #undef __FUNCT__ 1607 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1608 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1609 { 1610 Mat_MPIDense *a; 1611 PetscErrorCode ierr; 1612 1613 PetscFunctionBegin; 1614 mat->preallocated = PETSC_TRUE; 1615 /* Note: For now, when data is specified above, this assumes the user correctly 1616 allocates the local dense storage space. We should add error checking. */ 1617 1618 a = (Mat_MPIDense*)mat->data; 1619 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1620 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1621 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1622 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1623 a->nvec = mat->cmap->n; 1624 1625 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1626 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1627 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1628 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1629 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1630 PetscFunctionReturn(0); 1631 } 1632 EXTERN_C_END 1633 1634 /*MC 1635 MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1636 1637 run ./configure with the option --download-plapack 1638 1639 1640 Options Database Keys: 1641 . -mat_plapack_nprows <n> - number of rows in processor partition 1642 . -mat_plapack_npcols <n> - number of columns in processor partition 1643 . -mat_plapack_nb <n> - block size of template vector 1644 . -mat_plapack_nb_alg <n> - algorithmic block size 1645 - -mat_plapack_ckerror <n> - error checking flag 1646 1647 Level: intermediate 1648 1649 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1650 1651 M*/ 1652 1653 EXTERN_C_BEGIN 1654 #undef __FUNCT__ 1655 #define __FUNCT__ "MatCreate_MPIDense" 1656 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1657 { 1658 Mat_MPIDense *a; 1659 PetscErrorCode ierr; 1660 1661 PetscFunctionBegin; 1662 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1663 mat->data = (void*)a; 1664 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1665 mat->mapping = 0; 1666 1667 mat->insertmode = NOT_SET_VALUES; 1668 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1669 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1670 1671 /* build cache for off array entries formed */ 1672 a->donotstash = PETSC_FALSE; 1673 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1674 1675 /* stuff used for matrix vector multiply */ 1676 a->lvec = 0; 1677 a->Mvctx = 0; 1678 a->roworiented = PETSC_TRUE; 1679 1680 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1681 "MatGetDiagonalBlock_MPIDense", 1682 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1683 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1684 "MatMPIDenseSetPreallocation_MPIDense", 1685 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1686 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1687 "MatMatMult_MPIAIJ_MPIDense", 1688 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1689 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1690 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1691 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1692 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1693 "MatMatMultNumeric_MPIAIJ_MPIDense", 1694 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1695 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1696 "MatGetFactor_mpidense_petsc", 1697 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1698 #if defined(PETSC_HAVE_PLAPACK) 1699 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1700 "MatGetFactor_mpidense_plapack", 1701 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1702 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1703 #endif 1704 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1705 1706 PetscFunctionReturn(0); 1707 } 1708 EXTERN_C_END 1709 1710 /*MC 1711 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1712 1713 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1714 and MATMPIDENSE otherwise. 1715 1716 Options Database Keys: 1717 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1718 1719 Level: beginner 1720 1721 1722 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1723 M*/ 1724 1725 EXTERN_C_BEGIN 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatCreate_Dense" 1728 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1729 { 1730 PetscErrorCode ierr; 1731 PetscMPIInt size; 1732 1733 PetscFunctionBegin; 1734 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1735 if (size == 1) { 1736 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1737 } else { 1738 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1739 } 1740 PetscFunctionReturn(0); 1741 } 1742 EXTERN_C_END 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1746 /*@C 1747 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1748 1749 Not collective 1750 1751 Input Parameters: 1752 . A - the matrix 1753 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1754 to control all matrix memory allocation. 1755 1756 Notes: 1757 The dense format is fully compatible with standard Fortran 77 1758 storage by columns. 1759 1760 The data input variable is intended primarily for Fortran programmers 1761 who wish to allocate their own matrix memory space. Most users should 1762 set data=PETSC_NULL. 1763 1764 Level: intermediate 1765 1766 .keywords: matrix,dense, parallel 1767 1768 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1769 @*/ 1770 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1771 { 1772 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1773 1774 PetscFunctionBegin; 1775 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1776 if (f) { 1777 ierr = (*f)(mat,data);CHKERRQ(ierr); 1778 } 1779 PetscFunctionReturn(0); 1780 } 1781 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatCreateMPIDense" 1784 /*@C 1785 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1786 1787 Collective on MPI_Comm 1788 1789 Input Parameters: 1790 + comm - MPI communicator 1791 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1792 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1793 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1794 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1795 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1796 to control all matrix memory allocation. 1797 1798 Output Parameter: 1799 . A - the matrix 1800 1801 Notes: 1802 The dense format is fully compatible with standard Fortran 77 1803 storage by columns. 1804 1805 The data input variable is intended primarily for Fortran programmers 1806 who wish to allocate their own matrix memory space. Most users should 1807 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1808 1809 The user MUST specify either the local or global matrix dimensions 1810 (possibly both). 1811 1812 Level: intermediate 1813 1814 .keywords: matrix,dense, parallel 1815 1816 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1817 @*/ 1818 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1819 { 1820 PetscErrorCode ierr; 1821 PetscMPIInt size; 1822 1823 PetscFunctionBegin; 1824 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1825 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1826 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1827 if (size > 1) { 1828 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1829 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1830 } else { 1831 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1832 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1833 } 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatDuplicate_MPIDense" 1839 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1840 { 1841 Mat mat; 1842 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1843 PetscErrorCode ierr; 1844 1845 PetscFunctionBegin; 1846 *newmat = 0; 1847 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1848 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1849 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1850 a = (Mat_MPIDense*)mat->data; 1851 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1852 mat->factortype = A->factortype; 1853 mat->assembled = PETSC_TRUE; 1854 mat->preallocated = PETSC_TRUE; 1855 1856 mat->rmap->rstart = A->rmap->rstart; 1857 mat->rmap->rend = A->rmap->rend; 1858 a->size = oldmat->size; 1859 a->rank = oldmat->rank; 1860 mat->insertmode = NOT_SET_VALUES; 1861 a->nvec = oldmat->nvec; 1862 a->donotstash = oldmat->donotstash; 1863 1864 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1865 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1866 1867 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1868 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1869 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1870 1871 *newmat = mat; 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1877 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1878 { 1879 PetscErrorCode ierr; 1880 PetscMPIInt rank,size; 1881 PetscInt *rowners,i,m,nz,j; 1882 PetscScalar *array,*vals,*vals_ptr; 1883 MPI_Status status; 1884 1885 PetscFunctionBegin; 1886 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1887 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1888 1889 /* determine ownership of all rows */ 1890 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1891 else m = newmat->rmap->n; 1892 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1893 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1894 rowners[0] = 0; 1895 for (i=2; i<=size; i++) { 1896 rowners[i] += rowners[i-1]; 1897 } 1898 1899 if (!sizesset) { 1900 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1901 } 1902 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1903 ierr = MatGetArray(newmat,&array);CHKERRQ(ierr); 1904 1905 if (!rank) { 1906 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1907 1908 /* read in my part of the matrix numerical values */ 1909 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1910 1911 /* insert into matrix-by row (this is why cannot directly read into array */ 1912 vals_ptr = vals; 1913 for (i=0; i<m; i++) { 1914 for (j=0; j<N; j++) { 1915 array[i + j*m] = *vals_ptr++; 1916 } 1917 } 1918 1919 /* read in other processors and ship out */ 1920 for (i=1; i<size; i++) { 1921 nz = (rowners[i+1] - rowners[i])*N; 1922 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1923 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1924 } 1925 } else { 1926 /* receive numeric values */ 1927 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1928 1929 /* receive message of values*/ 1930 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr); 1931 1932 /* insert into matrix-by row (this is why cannot directly read into array */ 1933 vals_ptr = vals; 1934 for (i=0; i<m; i++) { 1935 for (j=0; j<N; j++) { 1936 array[i + j*m] = *vals_ptr++; 1937 } 1938 } 1939 } 1940 ierr = PetscFree(rowners);CHKERRQ(ierr); 1941 ierr = PetscFree(vals);CHKERRQ(ierr); 1942 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1943 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1944 PetscFunctionReturn(0); 1945 } 1946 1947 #undef __FUNCT__ 1948 #define __FUNCT__ "MatLoad_MPIDense" 1949 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1950 { 1951 PetscScalar *vals,*svals; 1952 MPI_Comm comm = ((PetscObject)viewer)->comm; 1953 MPI_Status status; 1954 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1955 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1956 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1957 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1958 int fd; 1959 PetscErrorCode ierr; 1960 1961 PetscFunctionBegin; 1962 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1963 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1964 if (!rank) { 1965 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1966 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1967 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1968 } 1969 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1970 1971 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1972 M = header[1]; N = header[2]; nz = header[3]; 1973 1974 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1975 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1976 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1977 1978 /* If global sizes are set, check if they are consistent with that given in the file */ 1979 if (sizesset) { 1980 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1981 } 1982 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1983 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1984 1985 /* 1986 Handle case where matrix is stored on disk as a dense matrix 1987 */ 1988 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1989 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1990 PetscFunctionReturn(0); 1991 } 1992 1993 /* determine ownership of all rows */ 1994 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1995 else m = PetscMPIIntCast(newmat->rmap->n); 1996 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1997 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1998 rowners[0] = 0; 1999 for (i=2; i<=size; i++) { 2000 rowners[i] += rowners[i-1]; 2001 } 2002 rstart = rowners[rank]; 2003 rend = rowners[rank+1]; 2004 2005 /* distribute row lengths to all processors */ 2006 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 2007 if (!rank) { 2008 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2009 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2010 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2011 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2012 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2013 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2014 } else { 2015 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2016 } 2017 2018 if (!rank) { 2019 /* calculate the number of nonzeros on each processor */ 2020 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2021 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2022 for (i=0; i<size; i++) { 2023 for (j=rowners[i]; j< rowners[i+1]; j++) { 2024 procsnz[i] += rowlengths[j]; 2025 } 2026 } 2027 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2028 2029 /* determine max buffer needed and allocate it */ 2030 maxnz = 0; 2031 for (i=0; i<size; i++) { 2032 maxnz = PetscMax(maxnz,procsnz[i]); 2033 } 2034 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2035 2036 /* read in my part of the matrix column indices */ 2037 nz = procsnz[0]; 2038 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2039 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2040 2041 /* read in every one elses and ship off */ 2042 for (i=1; i<size; i++) { 2043 nz = procsnz[i]; 2044 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2045 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2046 } 2047 ierr = PetscFree(cols);CHKERRQ(ierr); 2048 } else { 2049 /* determine buffer space needed for message */ 2050 nz = 0; 2051 for (i=0; i<m; i++) { 2052 nz += ourlens[i]; 2053 } 2054 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2055 2056 /* receive message of column indices*/ 2057 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2058 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2059 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2060 } 2061 2062 /* loop over local rows, determining number of off diagonal entries */ 2063 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2064 jj = 0; 2065 for (i=0; i<m; i++) { 2066 for (j=0; j<ourlens[i]; j++) { 2067 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2068 jj++; 2069 } 2070 } 2071 2072 /* create our matrix */ 2073 for (i=0; i<m; i++) { 2074 ourlens[i] -= offlens[i]; 2075 } 2076 2077 if (!sizesset) { 2078 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2079 } 2080 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 2081 for (i=0; i<m; i++) { 2082 ourlens[i] += offlens[i]; 2083 } 2084 2085 if (!rank) { 2086 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2087 2088 /* read in my part of the matrix numerical values */ 2089 nz = procsnz[0]; 2090 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2091 2092 /* insert into matrix */ 2093 jj = rstart; 2094 smycols = mycols; 2095 svals = vals; 2096 for (i=0; i<m; i++) { 2097 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2098 smycols += ourlens[i]; 2099 svals += ourlens[i]; 2100 jj++; 2101 } 2102 2103 /* read in other processors and ship out */ 2104 for (i=1; i<size; i++) { 2105 nz = procsnz[i]; 2106 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2107 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2108 } 2109 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2110 } else { 2111 /* receive numeric values */ 2112 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2113 2114 /* receive message of values*/ 2115 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2116 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2117 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2118 2119 /* insert into matrix */ 2120 jj = rstart; 2121 smycols = mycols; 2122 svals = vals; 2123 for (i=0; i<m; i++) { 2124 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2125 smycols += ourlens[i]; 2126 svals += ourlens[i]; 2127 jj++; 2128 } 2129 } 2130 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2131 ierr = PetscFree(vals);CHKERRQ(ierr); 2132 ierr = PetscFree(mycols);CHKERRQ(ierr); 2133 ierr = PetscFree(rowners);CHKERRQ(ierr); 2134 2135 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2136 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2137 PetscFunctionReturn(0); 2138 } 2139 2140 #undef __FUNCT__ 2141 #define __FUNCT__ "MatEqual_MPIDense" 2142 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2143 { 2144 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2145 Mat a,b; 2146 PetscTruth flg; 2147 PetscErrorCode ierr; 2148 2149 PetscFunctionBegin; 2150 a = matA->A; 2151 b = matB->A; 2152 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2153 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2154 PetscFunctionReturn(0); 2155 } 2156 2157 #if defined(PETSC_HAVE_PLAPACK) 2158 2159 #undef __FUNCT__ 2160 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2161 /*@C 2162 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2163 Level: developer 2164 2165 .keywords: Petsc, destroy, package, PLAPACK 2166 .seealso: PetscFinalize() 2167 @*/ 2168 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2169 { 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 ierr = PLA_Finalize();CHKERRQ(ierr); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 #undef __FUNCT__ 2178 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2179 /*@C 2180 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2181 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2182 2183 Input Parameter: 2184 . comm - the communicator the matrix lives on 2185 2186 Level: developer 2187 2188 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2189 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2190 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2191 cannot overlap. 2192 2193 .keywords: Petsc, initialize, package, PLAPACK 2194 .seealso: PetscSysInitializePackage(), PetscInitialize() 2195 @*/ 2196 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2197 { 2198 PetscMPIInt size; 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 if (!PLA_Initialized(PETSC_NULL)) { 2203 2204 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2205 Plapack_nprows = 1; 2206 Plapack_npcols = size; 2207 2208 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2209 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2210 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2211 #if defined(PETSC_USE_DEBUG) 2212 Plapack_ierror = 3; 2213 #else 2214 Plapack_ierror = 0; 2215 #endif 2216 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2217 if (Plapack_ierror){ 2218 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2219 } else { 2220 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2221 } 2222 2223 Plapack_nb_alg = 0; 2224 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2225 if (Plapack_nb_alg) { 2226 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2227 } 2228 PetscOptionsEnd(); 2229 2230 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2231 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2232 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2233 } 2234 PetscFunctionReturn(0); 2235 } 2236 2237 #endif 2238