1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 EXTERN_C_BEGIN 13 #include "PLA.h" 14 EXTERN_C_END 15 16 typedef struct { 17 PLA_Obj A,pivots; 18 PLA_Template templ; 19 MPI_Datatype datatype; 20 PetscInt nb,rstart; 21 VecScatter ctx; 22 IS is_pla,is_petsc; 23 PetscTruth pla_solved; 24 MatStructure mstruct; 25 } Mat_Plapack; 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatDenseGetLocalMatrix" 30 /*@ 31 32 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 33 matrix that represents the operator. For sequential matrices it returns itself. 34 35 Input Parameter: 36 . A - the Seq or MPI dense matrix 37 38 Output Parameter: 39 . B - the inner matrix 40 41 Level: intermediate 42 43 @*/ 44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 45 { 46 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 47 PetscErrorCode ierr; 48 PetscTruth flg; 49 50 PetscFunctionBegin; 51 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 52 if (flg) { 53 *B = mat->A; 54 } else { 55 *B = A; 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatGetRow_MPIDense" 62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 63 { 64 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 65 PetscErrorCode ierr; 66 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 67 68 PetscFunctionBegin; 69 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 70 lrow = row - rstart; 71 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatRestoreRow_MPIDense" 77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 83 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 84 PetscFunctionReturn(0); 85 } 86 87 EXTERN_C_BEGIN 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 91 { 92 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 93 PetscErrorCode ierr; 94 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 95 PetscScalar *array; 96 MPI_Comm comm; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 100 101 /* The reuse aspect is not implemented efficiently */ 102 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 103 104 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 105 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 106 ierr = MatCreate(comm,B);CHKERRQ(ierr); 107 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 108 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 109 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 110 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 111 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 113 114 *iscopy = PETSC_TRUE; 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatSetValues_MPIDense" 121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 122 { 123 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 124 PetscErrorCode ierr; 125 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 126 PetscTruth roworiented = A->roworiented; 127 128 PetscFunctionBegin; 129 if (v) PetscValidScalarPointer(v,6); 130 for (i=0; i<m; i++) { 131 if (idxm[i] < 0) continue; 132 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 133 if (idxm[i] >= rstart && idxm[i] < rend) { 134 row = idxm[i] - rstart; 135 if (roworiented) { 136 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 137 } else { 138 for (j=0; j<n; j++) { 139 if (idxn[j] < 0) continue; 140 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 141 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 142 } 143 } 144 } else { 145 if (!A->donotstash) { 146 if (roworiented) { 147 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 148 } else { 149 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 150 } 151 } 152 } 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetValues_MPIDense" 159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 160 { 161 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 164 165 PetscFunctionBegin; 166 for (i=0; i<m; i++) { 167 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 168 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 169 if (idxm[i] >= rstart && idxm[i] < rend) { 170 row = idxm[i] - rstart; 171 for (j=0; j<n; j++) { 172 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 173 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 174 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 175 } 176 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 177 } 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatGetArray_MPIDense" 183 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 184 { 185 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 186 PetscErrorCode ierr; 187 188 PetscFunctionBegin; 189 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 195 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 196 { 197 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 198 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 199 PetscErrorCode ierr; 200 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 201 const PetscInt *irow,*icol; 202 PetscScalar *av,*bv,*v = lmat->v; 203 Mat newmat; 204 IS iscol_local; 205 206 PetscFunctionBegin; 207 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 208 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 209 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 210 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 211 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 212 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 213 214 /* No parallel redistribution currently supported! Should really check each index set 215 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 216 original matrix! */ 217 218 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 219 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 220 221 /* Check submatrix call */ 222 if (scall == MAT_REUSE_MATRIX) { 223 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 224 /* Really need to test rows and column sizes! */ 225 newmat = *B; 226 } else { 227 /* Create and fill new matrix */ 228 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 229 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 230 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 231 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 232 } 233 234 /* Now extract the data pointers and do the copy, column at a time */ 235 newmatd = (Mat_MPIDense*)newmat->data; 236 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 237 238 for (i=0; i<Ncols; i++) { 239 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 240 for (j=0; j<nrows; j++) { 241 *bv++ = av[irow[j] - rstart]; 242 } 243 } 244 245 /* Assemble the matrices so that the correct flags are set */ 246 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 248 249 /* Free work space */ 250 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 251 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 252 *B = newmat; 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatRestoreArray_MPIDense" 258 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 259 { 260 PetscFunctionBegin; 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 266 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 267 { 268 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 269 MPI_Comm comm = ((PetscObject)mat)->comm; 270 PetscErrorCode ierr; 271 PetscInt nstash,reallocs; 272 InsertMode addv; 273 274 PetscFunctionBegin; 275 /* make sure all processors are either in INSERTMODE or ADDMODE */ 276 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 277 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 278 mat->insertmode = addv; /* in case this processor had no cache */ 279 280 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 281 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 282 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 288 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 289 { 290 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 291 PetscErrorCode ierr; 292 PetscInt i,*row,*col,flg,j,rstart,ncols; 293 PetscMPIInt n; 294 PetscScalar *val; 295 InsertMode addv=mat->insertmode; 296 297 PetscFunctionBegin; 298 /* wait on receives */ 299 while (1) { 300 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 301 if (!flg) break; 302 303 for (i=0; i<n;) { 304 /* Now identify the consecutive vals belonging to the same row */ 305 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 306 if (j < n) ncols = j-i; 307 else ncols = n-i; 308 /* Now assemble all these values with a single function call */ 309 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 310 i = j; 311 } 312 } 313 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 314 315 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 316 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 317 318 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 319 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatZeroEntries_MPIDense" 326 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 327 { 328 PetscErrorCode ierr; 329 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 330 331 PetscFunctionBegin; 332 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 333 PetscFunctionReturn(0); 334 } 335 336 /* the code does not do the diagonal entries correctly unless the 337 matrix is square and the column and row owerships are identical. 338 This is a BUG. The only way to fix it seems to be to access 339 mdn->A and mdn->B directly and not through the MatZeroRows() 340 routine. 341 */ 342 #undef __FUNCT__ 343 #define __FUNCT__ "MatZeroRows_MPIDense" 344 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 345 { 346 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 347 PetscErrorCode ierr; 348 PetscInt i,*owners = A->rmap->range; 349 PetscInt *nprocs,j,idx,nsends; 350 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 351 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 352 PetscInt *lens,*lrows,*values; 353 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 354 MPI_Comm comm = ((PetscObject)A)->comm; 355 MPI_Request *send_waits,*recv_waits; 356 MPI_Status recv_status,*send_status; 357 PetscTruth found; 358 359 PetscFunctionBegin; 360 /* first count number of contributors to each processor */ 361 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 362 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 363 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 364 for (i=0; i<N; i++) { 365 idx = rows[i]; 366 found = PETSC_FALSE; 367 for (j=0; j<size; j++) { 368 if (idx >= owners[j] && idx < owners[j+1]) { 369 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 370 } 371 } 372 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 373 } 374 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 375 376 /* inform other processors of number of messages and max length*/ 377 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 378 379 /* post receives: */ 380 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 381 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 382 for (i=0; i<nrecvs; i++) { 383 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 384 } 385 386 /* do sends: 387 1) starts[i] gives the starting index in svalues for stuff going to 388 the ith processor 389 */ 390 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 391 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 392 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 393 starts[0] = 0; 394 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 395 for (i=0; i<N; i++) { 396 svalues[starts[owner[i]]++] = rows[i]; 397 } 398 399 starts[0] = 0; 400 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 401 count = 0; 402 for (i=0; i<size; i++) { 403 if (nprocs[2*i+1]) { 404 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 405 } 406 } 407 ierr = PetscFree(starts);CHKERRQ(ierr); 408 409 base = owners[rank]; 410 411 /* wait on receives */ 412 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 413 count = nrecvs; 414 slen = 0; 415 while (count) { 416 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 417 /* unpack receives into our local space */ 418 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 419 source[imdex] = recv_status.MPI_SOURCE; 420 lens[imdex] = n; 421 slen += n; 422 count--; 423 } 424 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 425 426 /* move the data into the send scatter */ 427 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 428 count = 0; 429 for (i=0; i<nrecvs; i++) { 430 values = rvalues + i*nmax; 431 for (j=0; j<lens[i]; j++) { 432 lrows[count++] = values[j] - base; 433 } 434 } 435 ierr = PetscFree(rvalues);CHKERRQ(ierr); 436 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 437 ierr = PetscFree(owner);CHKERRQ(ierr); 438 ierr = PetscFree(nprocs);CHKERRQ(ierr); 439 440 /* actually zap the local rows */ 441 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 442 ierr = PetscFree(lrows);CHKERRQ(ierr); 443 444 /* wait on sends */ 445 if (nsends) { 446 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 447 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 448 ierr = PetscFree(send_status);CHKERRQ(ierr); 449 } 450 ierr = PetscFree(send_waits);CHKERRQ(ierr); 451 ierr = PetscFree(svalues);CHKERRQ(ierr); 452 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatMult_MPIDense" 458 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 459 { 460 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 465 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 467 PetscFunctionReturn(0); 468 } 469 470 #undef __FUNCT__ 471 #define __FUNCT__ "MatMultAdd_MPIDense" 472 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 477 PetscFunctionBegin; 478 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 480 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatMultTranspose_MPIDense" 486 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 487 { 488 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 489 PetscErrorCode ierr; 490 PetscScalar zero = 0.0; 491 492 PetscFunctionBegin; 493 ierr = VecSet(yy,zero);CHKERRQ(ierr); 494 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 495 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 496 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 502 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 503 { 504 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 509 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 510 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 511 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 512 PetscFunctionReturn(0); 513 } 514 515 #undef __FUNCT__ 516 #define __FUNCT__ "MatGetDiagonal_MPIDense" 517 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 518 { 519 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 520 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 521 PetscErrorCode ierr; 522 PetscInt len,i,n,m = A->rmap->n,radd; 523 PetscScalar *x,zero = 0.0; 524 525 PetscFunctionBegin; 526 ierr = VecSet(v,zero);CHKERRQ(ierr); 527 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 528 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 529 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 530 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 531 radd = A->rmap->rstart*m; 532 for (i=0; i<len; i++) { 533 x[i] = aloc->v[radd + i*m + i]; 534 } 535 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatDestroy_MPIDense" 541 PetscErrorCode MatDestroy_MPIDense(Mat mat) 542 { 543 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 544 PetscErrorCode ierr; 545 #if defined(PETSC_HAVE_PLAPACK) 546 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 547 #endif 548 549 PetscFunctionBegin; 550 551 #if defined(PETSC_USE_LOG) 552 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 553 #endif 554 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 555 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 556 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 557 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 558 #if defined(PETSC_HAVE_PLAPACK) 559 if (lu) { 560 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 561 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 562 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 563 564 if (lu->is_pla) { 565 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 566 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 567 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 568 } 569 } 570 #endif 571 572 ierr = PetscFree(mdn);CHKERRQ(ierr); 573 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatView_MPIDense_Binary" 584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 585 { 586 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 587 PetscErrorCode ierr; 588 PetscViewerFormat format; 589 int fd; 590 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 591 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 592 PetscScalar *work,*v,*vv; 593 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 594 MPI_Status status; 595 596 PetscFunctionBegin; 597 if (mdn->size == 1) { 598 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 599 } else { 600 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 601 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 602 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 603 604 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 605 if (format == PETSC_VIEWER_NATIVE) { 606 607 if (!rank) { 608 /* store the matrix as a dense matrix */ 609 header[0] = MAT_FILE_CLASSID; 610 header[1] = mat->rmap->N; 611 header[2] = N; 612 header[3] = MATRIX_BINARY_FORMAT_DENSE; 613 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 614 615 /* get largest work array needed for transposing array */ 616 mmax = mat->rmap->n; 617 for (i=1; i<size; i++) { 618 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 619 } 620 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 621 622 /* write out local array, by rows */ 623 m = mat->rmap->n; 624 v = a->v; 625 for (j=0; j<N; j++) { 626 for (i=0; i<m; i++) { 627 work[j + i*N] = *v++; 628 } 629 } 630 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 631 /* get largest work array to receive messages from other processes, excludes process zero */ 632 mmax = 0; 633 for (i=1; i<size; i++) { 634 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 635 } 636 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 637 for(k = 1; k < size; k++) { 638 v = vv; 639 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 640 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 641 642 for(j = 0; j < N; j++) { 643 for(i = 0; i < m; i++) { 644 work[j + i*N] = *v++; 645 } 646 } 647 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 648 } 649 ierr = PetscFree(work);CHKERRQ(ierr); 650 ierr = PetscFree(vv);CHKERRQ(ierr); 651 } else { 652 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 653 } 654 } SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE"); 655 } 656 PetscFunctionReturn(0); 657 } 658 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 661 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 662 { 663 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 664 PetscErrorCode ierr; 665 PetscMPIInt size = mdn->size,rank = mdn->rank; 666 const PetscViewerType vtype; 667 PetscTruth iascii,isdraw; 668 PetscViewer sviewer; 669 PetscViewerFormat format; 670 #if defined(PETSC_HAVE_PLAPACK) 671 Mat_Plapack *lu; 672 #endif 673 674 PetscFunctionBegin; 675 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 676 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 677 if (iascii) { 678 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 679 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 680 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 681 MatInfo info; 682 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 683 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 684 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 685 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 686 #if defined(PETSC_HAVE_PLAPACK) 687 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 688 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 689 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 690 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 691 if (mat->factortype){ 692 lu=(Mat_Plapack*)(mat->spptr); 693 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 694 } 695 #else 696 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 697 #endif 698 PetscFunctionReturn(0); 699 } else if (format == PETSC_VIEWER_ASCII_INFO) { 700 PetscFunctionReturn(0); 701 } 702 } else if (isdraw) { 703 PetscDraw draw; 704 PetscTruth isnull; 705 706 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 707 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 708 if (isnull) PetscFunctionReturn(0); 709 } 710 711 if (size == 1) { 712 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 713 } else { 714 /* assemble the entire matrix onto first processor. */ 715 Mat A; 716 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 717 PetscInt *cols; 718 PetscScalar *vals; 719 720 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 721 if (!rank) { 722 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 723 } else { 724 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 725 } 726 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 727 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 728 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 729 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 730 731 /* Copy the matrix ... This isn't the most efficient means, 732 but it's quick for now */ 733 A->insertmode = INSERT_VALUES; 734 row = mat->rmap->rstart; m = mdn->A->rmap->n; 735 for (i=0; i<m; i++) { 736 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 737 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 738 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 739 row++; 740 } 741 742 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 743 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 744 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 745 if (!rank) { 746 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 747 } 748 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 749 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 750 ierr = MatDestroy(A);CHKERRQ(ierr); 751 } 752 PetscFunctionReturn(0); 753 } 754 755 #undef __FUNCT__ 756 #define __FUNCT__ "MatView_MPIDense" 757 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 758 { 759 PetscErrorCode ierr; 760 PetscTruth iascii,isbinary,isdraw,issocket; 761 762 PetscFunctionBegin; 763 764 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 765 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 766 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 767 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 768 769 if (iascii || issocket || isdraw) { 770 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 771 } else if (isbinary) { 772 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 773 } else { 774 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 775 } 776 PetscFunctionReturn(0); 777 } 778 779 #undef __FUNCT__ 780 #define __FUNCT__ "MatGetInfo_MPIDense" 781 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 782 { 783 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 784 Mat mdn = mat->A; 785 PetscErrorCode ierr; 786 PetscReal isend[5],irecv[5]; 787 788 PetscFunctionBegin; 789 info->block_size = 1.0; 790 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 791 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 792 isend[3] = info->memory; isend[4] = info->mallocs; 793 if (flag == MAT_LOCAL) { 794 info->nz_used = isend[0]; 795 info->nz_allocated = isend[1]; 796 info->nz_unneeded = isend[2]; 797 info->memory = isend[3]; 798 info->mallocs = isend[4]; 799 } else if (flag == MAT_GLOBAL_MAX) { 800 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 801 info->nz_used = irecv[0]; 802 info->nz_allocated = irecv[1]; 803 info->nz_unneeded = irecv[2]; 804 info->memory = irecv[3]; 805 info->mallocs = irecv[4]; 806 } else if (flag == MAT_GLOBAL_SUM) { 807 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 808 info->nz_used = irecv[0]; 809 info->nz_allocated = irecv[1]; 810 info->nz_unneeded = irecv[2]; 811 info->memory = irecv[3]; 812 info->mallocs = irecv[4]; 813 } 814 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 815 info->fill_ratio_needed = 0; 816 info->factor_mallocs = 0; 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatSetOption_MPIDense" 822 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 823 { 824 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 825 PetscErrorCode ierr; 826 827 PetscFunctionBegin; 828 switch (op) { 829 case MAT_NEW_NONZERO_LOCATIONS: 830 case MAT_NEW_NONZERO_LOCATION_ERR: 831 case MAT_NEW_NONZERO_ALLOCATION_ERR: 832 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 833 break; 834 case MAT_ROW_ORIENTED: 835 a->roworiented = flg; 836 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 837 break; 838 case MAT_NEW_DIAGONALS: 839 case MAT_USE_HASH_TABLE: 840 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 841 break; 842 case MAT_IGNORE_OFF_PROC_ENTRIES: 843 a->donotstash = flg; 844 break; 845 case MAT_SYMMETRIC: 846 case MAT_STRUCTURALLY_SYMMETRIC: 847 case MAT_HERMITIAN: 848 case MAT_SYMMETRY_ETERNAL: 849 case MAT_IGNORE_LOWER_TRIANGULAR: 850 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 851 break; 852 default: 853 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 854 } 855 PetscFunctionReturn(0); 856 } 857 858 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatDiagonalScale_MPIDense" 861 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 862 { 863 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 864 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 865 PetscScalar *l,*r,x,*v; 866 PetscErrorCode ierr; 867 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 868 869 PetscFunctionBegin; 870 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 871 if (ll) { 872 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 873 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 874 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 875 for (i=0; i<m; i++) { 876 x = l[i]; 877 v = mat->v + i; 878 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 879 } 880 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 881 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 882 } 883 if (rr) { 884 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 885 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 886 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 888 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 889 for (i=0; i<n; i++) { 890 x = r[i]; 891 v = mat->v + i*m; 892 for (j=0; j<m; j++) { (*v++) *= x;} 893 } 894 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 895 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 896 } 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatNorm_MPIDense" 902 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 903 { 904 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 905 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 906 PetscErrorCode ierr; 907 PetscInt i,j; 908 PetscReal sum = 0.0; 909 PetscScalar *v = mat->v; 910 911 PetscFunctionBegin; 912 if (mdn->size == 1) { 913 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 914 } else { 915 if (type == NORM_FROBENIUS) { 916 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 917 #if defined(PETSC_USE_COMPLEX) 918 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 919 #else 920 sum += (*v)*(*v); v++; 921 #endif 922 } 923 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 924 *nrm = sqrt(*nrm); 925 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 926 } else if (type == NORM_1) { 927 PetscReal *tmp,*tmp2; 928 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 929 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 930 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 931 *nrm = 0.0; 932 v = mat->v; 933 for (j=0; j<mdn->A->cmap->n; j++) { 934 for (i=0; i<mdn->A->rmap->n; i++) { 935 tmp[j] += PetscAbsScalar(*v); v++; 936 } 937 } 938 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 939 for (j=0; j<A->cmap->N; j++) { 940 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 941 } 942 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 943 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 944 } else if (type == NORM_INFINITY) { /* max row norm */ 945 PetscReal ntemp; 946 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 947 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 948 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 949 } 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatTranspose_MPIDense" 955 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 956 { 957 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 958 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 959 Mat B; 960 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 961 PetscErrorCode ierr; 962 PetscInt j,i; 963 PetscScalar *v; 964 965 PetscFunctionBegin; 966 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 967 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 968 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 969 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 970 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 971 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 972 } else { 973 B = *matout; 974 } 975 976 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 977 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 978 for (i=0; i<m; i++) rwork[i] = rstart + i; 979 for (j=0; j<n; j++) { 980 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 981 v += m; 982 } 983 ierr = PetscFree(rwork);CHKERRQ(ierr); 984 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 985 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 986 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 987 *matout = B; 988 } else { 989 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 990 } 991 PetscFunctionReturn(0); 992 } 993 994 995 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 996 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 997 998 #undef __FUNCT__ 999 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1000 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1001 { 1002 PetscErrorCode ierr; 1003 1004 PetscFunctionBegin; 1005 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #if defined(PETSC_HAVE_PLAPACK) 1010 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1013 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1014 { 1015 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1016 PetscErrorCode ierr; 1017 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1018 PetscScalar *array; 1019 PetscReal one = 1.0; 1020 1021 PetscFunctionBegin; 1022 /* Copy A into F->lu->A */ 1023 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1024 ierr = PLA_API_begin();CHKERRQ(ierr); 1025 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1026 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1027 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1028 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1029 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1030 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1031 ierr = PLA_API_end();CHKERRQ(ierr); 1032 lu->rstart = rstart; 1033 PetscFunctionReturn(0); 1034 } 1035 1036 #undef __FUNCT__ 1037 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1038 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1039 { 1040 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1041 PetscErrorCode ierr; 1042 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1043 PetscScalar *array; 1044 PetscReal one = 1.0; 1045 1046 PetscFunctionBegin; 1047 /* Copy F into A->lu->A */ 1048 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1049 ierr = PLA_API_begin();CHKERRQ(ierr); 1050 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1051 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1052 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1053 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1054 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1055 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1056 ierr = PLA_API_end();CHKERRQ(ierr); 1057 lu->rstart = rstart; 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1063 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1064 { 1065 PetscErrorCode ierr; 1066 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1067 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1068 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1069 PLA_Obj alpha = NULL,beta = NULL; 1070 1071 PetscFunctionBegin; 1072 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1073 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1074 1075 /* 1076 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1077 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1078 */ 1079 1080 /* do the multiply in PLA */ 1081 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1082 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1083 CHKMEMQ; 1084 1085 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1086 CHKMEMQ; 1087 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1088 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1089 1090 /* 1091 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1092 */ 1093 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1099 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1100 { 1101 PetscErrorCode ierr; 1102 PetscInt m=A->rmap->n,n=B->cmap->n; 1103 Mat Cmat; 1104 1105 PetscFunctionBegin; 1106 if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1107 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1108 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1109 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1110 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1111 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1112 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1113 1114 *C = Cmat; 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1120 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1121 { 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 if (scall == MAT_INITIAL_MATRIX){ 1126 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1127 } 1128 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatSolve_MPIDense" 1134 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1135 { 1136 MPI_Comm comm = ((PetscObject)A)->comm; 1137 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1138 PetscErrorCode ierr; 1139 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1140 PetscScalar *array; 1141 PetscReal one = 1.0; 1142 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1143 PLA_Obj v_pla = NULL; 1144 PetscScalar *loc_buf; 1145 Vec loc_x; 1146 1147 PetscFunctionBegin; 1148 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1149 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1150 1151 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1152 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1153 PLA_Obj_set_to_zero(v_pla); 1154 1155 /* Copy b into rhs_pla */ 1156 PLA_API_begin(); 1157 PLA_Obj_API_open(v_pla); 1158 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1159 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1160 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1161 PLA_Obj_API_close(v_pla); 1162 PLA_API_end(); 1163 1164 if (A->factortype == MAT_FACTOR_LU){ 1165 /* Apply the permutations to the right hand sides */ 1166 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1167 1168 /* Solve L y = b, overwriting b with y */ 1169 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1170 1171 /* Solve U x = y (=b), overwriting b with x */ 1172 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1173 } else { /* MAT_FACTOR_CHOLESKY */ 1174 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1175 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1176 PLA_NONUNIT_DIAG,lu->A,v_pla); 1177 } 1178 1179 /* Copy PLAPACK x into Petsc vector x */ 1180 PLA_Obj_local_length(v_pla, &loc_m); 1181 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1182 PLA_Obj_local_stride(v_pla, &loc_stride); 1183 /* 1184 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1185 */ 1186 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1187 if (!lu->pla_solved){ 1188 1189 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1190 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1191 1192 /* Create IS and cts for VecScatterring */ 1193 PLA_Obj_local_length(v_pla, &loc_m); 1194 PLA_Obj_local_stride(v_pla, &loc_stride); 1195 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1196 1197 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1198 for (i=0; i<loc_m; i+=lu->nb){ 1199 j = 0; 1200 while (j < lu->nb && i+j < loc_m){ 1201 idx_petsc[i+j] = rstart + j; j++; 1202 } 1203 rstart += size*lu->nb; 1204 } 1205 1206 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1207 1208 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1209 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1210 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1211 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1212 } 1213 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1215 1216 /* Free data */ 1217 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1218 PLA_Obj_free(&v_pla); 1219 1220 lu->pla_solved = PETSC_TRUE; 1221 PetscFunctionReturn(0); 1222 } 1223 1224 #undef __FUNCT__ 1225 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1226 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1227 { 1228 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1229 PetscErrorCode ierr; 1230 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1231 PetscInt info_pla=0; 1232 PetscScalar *array,one = 1.0; 1233 1234 PetscFunctionBegin; 1235 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1236 PLA_Obj_free(&lu->A); 1237 PLA_Obj_free (&lu->pivots); 1238 } 1239 /* Create PLAPACK matrix object */ 1240 lu->A = NULL; lu->pivots = NULL; 1241 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1242 PLA_Obj_set_to_zero(lu->A); 1243 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1244 1245 /* Copy A into lu->A */ 1246 PLA_API_begin(); 1247 PLA_Obj_API_open(lu->A); 1248 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1249 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1250 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1251 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1252 PLA_Obj_API_close(lu->A); 1253 PLA_API_end(); 1254 1255 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1256 info_pla = PLA_LU(lu->A,lu->pivots); 1257 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1258 1259 lu->rstart = rstart; 1260 lu->mstruct = SAME_NONZERO_PATTERN; 1261 F->ops->solve = MatSolve_MPIDense; 1262 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1268 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1269 { 1270 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1271 PetscErrorCode ierr; 1272 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1273 PetscInt info_pla=0; 1274 PetscScalar *array,one = 1.0; 1275 1276 PetscFunctionBegin; 1277 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1278 PLA_Obj_free(&lu->A); 1279 } 1280 /* Create PLAPACK matrix object */ 1281 lu->A = NULL; 1282 lu->pivots = NULL; 1283 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1284 1285 /* Copy A into lu->A */ 1286 PLA_API_begin(); 1287 PLA_Obj_API_open(lu->A); 1288 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1289 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1290 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1291 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1292 PLA_Obj_API_close(lu->A); 1293 PLA_API_end(); 1294 1295 /* Factor P A -> Chol */ 1296 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1297 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1298 1299 lu->rstart = rstart; 1300 lu->mstruct = SAME_NONZERO_PATTERN; 1301 F->ops->solve = MatSolve_MPIDense; 1302 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1303 PetscFunctionReturn(0); 1304 } 1305 1306 /* Note the Petsc perm permutation is ignored */ 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1309 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1310 { 1311 PetscErrorCode ierr; 1312 PetscTruth issymmetric,set; 1313 1314 PetscFunctionBegin; 1315 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1316 if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1317 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1318 PetscFunctionReturn(0); 1319 } 1320 1321 /* Note the Petsc r and c permutations are ignored */ 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1324 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1325 { 1326 PetscErrorCode ierr; 1327 PetscInt M = A->rmap->N; 1328 Mat_Plapack *lu; 1329 1330 PetscFunctionBegin; 1331 lu = (Mat_Plapack*)F->spptr; 1332 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1333 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1334 PetscFunctionReturn(0); 1335 } 1336 1337 EXTERN_C_BEGIN 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1340 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1341 { 1342 PetscFunctionBegin; 1343 *type = MAT_SOLVER_PLAPACK; 1344 PetscFunctionReturn(0); 1345 } 1346 EXTERN_C_END 1347 1348 EXTERN_C_BEGIN 1349 #undef __FUNCT__ 1350 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1351 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1352 { 1353 PetscErrorCode ierr; 1354 Mat_Plapack *lu; 1355 PetscMPIInt size; 1356 PetscInt M=A->rmap->N; 1357 1358 PetscFunctionBegin; 1359 /* Create the factorization matrix */ 1360 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1361 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1362 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1363 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1364 (*F)->spptr = (void*)lu; 1365 1366 /* Set default Plapack parameters */ 1367 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1368 lu->nb = M/size; 1369 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1370 1371 /* Set runtime options */ 1372 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1373 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1374 PetscOptionsEnd(); 1375 1376 /* Create object distribution template */ 1377 lu->templ = NULL; 1378 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1379 1380 /* Set the datatype */ 1381 #if defined(PETSC_USE_COMPLEX) 1382 lu->datatype = MPI_DOUBLE_COMPLEX; 1383 #else 1384 lu->datatype = MPI_DOUBLE; 1385 #endif 1386 1387 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1388 1389 1390 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1391 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1392 1393 if (ftype == MAT_FACTOR_LU) { 1394 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1395 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1396 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1397 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1398 (*F)->factortype = ftype; 1399 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 EXTERN_C_END 1403 #endif 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1407 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1408 { 1409 #if defined(PETSC_HAVE_PLAPACK) 1410 PetscErrorCode ierr; 1411 #endif 1412 1413 PetscFunctionBegin; 1414 #if defined(PETSC_HAVE_PLAPACK) 1415 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1416 #else 1417 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1418 #endif 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatAXPY_MPIDense" 1424 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1425 { 1426 PetscErrorCode ierr; 1427 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1428 1429 PetscFunctionBegin; 1430 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatConjugate_MPIDense" 1436 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat) 1437 { 1438 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1439 PetscErrorCode ierr; 1440 1441 PetscFunctionBegin; 1442 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatRealPart_MPIDense" 1448 PetscErrorCode MatRealPart_MPIDense(Mat A) 1449 { 1450 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1451 PetscErrorCode ierr; 1452 1453 PetscFunctionBegin; 1454 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1460 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1461 { 1462 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1463 PetscErrorCode ierr; 1464 1465 PetscFunctionBegin; 1466 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1467 PetscFunctionReturn(0); 1468 } 1469 1470 /* -------------------------------------------------------------------*/ 1471 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1472 MatGetRow_MPIDense, 1473 MatRestoreRow_MPIDense, 1474 MatMult_MPIDense, 1475 /* 4*/ MatMultAdd_MPIDense, 1476 MatMultTranspose_MPIDense, 1477 MatMultTransposeAdd_MPIDense, 1478 0, 1479 0, 1480 0, 1481 /*10*/ 0, 1482 0, 1483 0, 1484 0, 1485 MatTranspose_MPIDense, 1486 /*15*/ MatGetInfo_MPIDense, 1487 MatEqual_MPIDense, 1488 MatGetDiagonal_MPIDense, 1489 MatDiagonalScale_MPIDense, 1490 MatNorm_MPIDense, 1491 /*20*/ MatAssemblyBegin_MPIDense, 1492 MatAssemblyEnd_MPIDense, 1493 MatSetOption_MPIDense, 1494 MatZeroEntries_MPIDense, 1495 /*24*/ MatZeroRows_MPIDense, 1496 0, 1497 0, 1498 0, 1499 0, 1500 /*29*/ MatSetUpPreallocation_MPIDense, 1501 0, 1502 0, 1503 MatGetArray_MPIDense, 1504 MatRestoreArray_MPIDense, 1505 /*34*/ MatDuplicate_MPIDense, 1506 0, 1507 0, 1508 0, 1509 0, 1510 /*39*/ MatAXPY_MPIDense, 1511 MatGetSubMatrices_MPIDense, 1512 0, 1513 MatGetValues_MPIDense, 1514 0, 1515 /*44*/ 0, 1516 MatScale_MPIDense, 1517 0, 1518 0, 1519 0, 1520 /*49*/ 0, 1521 0, 1522 0, 1523 0, 1524 0, 1525 /*54*/ 0, 1526 0, 1527 0, 1528 0, 1529 0, 1530 /*59*/ MatGetSubMatrix_MPIDense, 1531 MatDestroy_MPIDense, 1532 MatView_MPIDense, 1533 0, 1534 0, 1535 /*64*/ 0, 1536 0, 1537 0, 1538 0, 1539 0, 1540 /*69*/ 0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 /*74*/ 0, 1546 0, 1547 0, 1548 0, 1549 0, 1550 /*79*/ 0, 1551 0, 1552 0, 1553 0, 1554 /*83*/ MatLoad_MPIDense, 1555 0, 1556 0, 1557 0, 1558 0, 1559 0, 1560 /*89*/ 1561 #if defined(PETSC_HAVE_PLAPACK) 1562 MatMatMult_MPIDense_MPIDense, 1563 MatMatMultSymbolic_MPIDense_MPIDense, 1564 MatMatMultNumeric_MPIDense_MPIDense, 1565 #else 1566 0, 1567 0, 1568 0, 1569 #endif 1570 0, 1571 0, 1572 /*94*/ 0, 1573 0, 1574 0, 1575 0, 1576 0, 1577 /*99*/ 0, 1578 0, 1579 0, 1580 MatConjugate_MPIDense, 1581 0, 1582 /*104*/0, 1583 MatRealPart_MPIDense, 1584 MatImaginaryPart_MPIDense, 1585 0 1586 }; 1587 1588 EXTERN_C_BEGIN 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1591 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1592 { 1593 Mat_MPIDense *a; 1594 PetscErrorCode ierr; 1595 1596 PetscFunctionBegin; 1597 mat->preallocated = PETSC_TRUE; 1598 /* Note: For now, when data is specified above, this assumes the user correctly 1599 allocates the local dense storage space. We should add error checking. */ 1600 1601 a = (Mat_MPIDense*)mat->data; 1602 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1603 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1604 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1605 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1606 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 EXTERN_C_END 1610 1611 /*MC 1612 MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1613 1614 run ./configure with the option --download-plapack 1615 1616 1617 Options Database Keys: 1618 . -mat_plapack_nprows <n> - number of rows in processor partition 1619 . -mat_plapack_npcols <n> - number of columns in processor partition 1620 . -mat_plapack_nb <n> - block size of template vector 1621 . -mat_plapack_nb_alg <n> - algorithmic block size 1622 - -mat_plapack_ckerror <n> - error checking flag 1623 1624 Level: intermediate 1625 1626 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1627 1628 M*/ 1629 1630 EXTERN_C_BEGIN 1631 #undef __FUNCT__ 1632 #define __FUNCT__ "MatCreate_MPIDense" 1633 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1634 { 1635 Mat_MPIDense *a; 1636 PetscErrorCode ierr; 1637 1638 PetscFunctionBegin; 1639 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1640 mat->data = (void*)a; 1641 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1642 mat->mapping = 0; 1643 1644 mat->insertmode = NOT_SET_VALUES; 1645 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1646 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1647 1648 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1649 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1650 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1651 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1652 a->nvec = mat->cmap->n; 1653 1654 /* build cache for off array entries formed */ 1655 a->donotstash = PETSC_FALSE; 1656 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1657 1658 /* stuff used for matrix vector multiply */ 1659 a->lvec = 0; 1660 a->Mvctx = 0; 1661 a->roworiented = PETSC_TRUE; 1662 1663 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1664 "MatGetDiagonalBlock_MPIDense", 1665 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1666 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1667 "MatMPIDenseSetPreallocation_MPIDense", 1668 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1669 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1670 "MatMatMult_MPIAIJ_MPIDense", 1671 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1672 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1673 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1674 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1675 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1676 "MatMatMultNumeric_MPIAIJ_MPIDense", 1677 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1678 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1679 "MatGetFactor_mpidense_petsc", 1680 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1681 #if defined(PETSC_HAVE_PLAPACK) 1682 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1683 "MatGetFactor_mpidense_plapack", 1684 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1685 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1686 #endif 1687 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1688 1689 PetscFunctionReturn(0); 1690 } 1691 EXTERN_C_END 1692 1693 /*MC 1694 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1695 1696 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1697 and MATMPIDENSE otherwise. 1698 1699 Options Database Keys: 1700 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1701 1702 Level: beginner 1703 1704 1705 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1706 M*/ 1707 1708 EXTERN_C_BEGIN 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatCreate_Dense" 1711 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1712 { 1713 PetscErrorCode ierr; 1714 PetscMPIInt size; 1715 1716 PetscFunctionBegin; 1717 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1718 if (size == 1) { 1719 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1720 } else { 1721 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1722 } 1723 PetscFunctionReturn(0); 1724 } 1725 EXTERN_C_END 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1729 /*@C 1730 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1731 1732 Not collective 1733 1734 Input Parameters: 1735 . A - the matrix 1736 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1737 to control all matrix memory allocation. 1738 1739 Notes: 1740 The dense format is fully compatible with standard Fortran 77 1741 storage by columns. 1742 1743 The data input variable is intended primarily for Fortran programmers 1744 who wish to allocate their own matrix memory space. Most users should 1745 set data=PETSC_NULL. 1746 1747 Level: intermediate 1748 1749 .keywords: matrix,dense, parallel 1750 1751 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1752 @*/ 1753 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1754 { 1755 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1756 1757 PetscFunctionBegin; 1758 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1759 if (f) { 1760 ierr = (*f)(mat,data);CHKERRQ(ierr); 1761 } 1762 PetscFunctionReturn(0); 1763 } 1764 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatCreateMPIDense" 1767 /*@C 1768 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1769 1770 Collective on MPI_Comm 1771 1772 Input Parameters: 1773 + comm - MPI communicator 1774 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1775 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1776 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1777 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1778 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1779 to control all matrix memory allocation. 1780 1781 Output Parameter: 1782 . A - the matrix 1783 1784 Notes: 1785 The dense format is fully compatible with standard Fortran 77 1786 storage by columns. 1787 1788 The data input variable is intended primarily for Fortran programmers 1789 who wish to allocate their own matrix memory space. Most users should 1790 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1791 1792 The user MUST specify either the local or global matrix dimensions 1793 (possibly both). 1794 1795 Level: intermediate 1796 1797 .keywords: matrix,dense, parallel 1798 1799 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1800 @*/ 1801 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1802 { 1803 PetscErrorCode ierr; 1804 PetscMPIInt size; 1805 1806 PetscFunctionBegin; 1807 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1808 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1809 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1810 if (size > 1) { 1811 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1812 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1813 } else { 1814 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1815 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1816 } 1817 PetscFunctionReturn(0); 1818 } 1819 1820 #undef __FUNCT__ 1821 #define __FUNCT__ "MatDuplicate_MPIDense" 1822 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1823 { 1824 Mat mat; 1825 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1826 PetscErrorCode ierr; 1827 1828 PetscFunctionBegin; 1829 *newmat = 0; 1830 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1831 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1832 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1833 a = (Mat_MPIDense*)mat->data; 1834 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1835 mat->factortype = A->factortype; 1836 mat->assembled = PETSC_TRUE; 1837 mat->preallocated = PETSC_TRUE; 1838 1839 mat->rmap->rstart = A->rmap->rstart; 1840 mat->rmap->rend = A->rmap->rend; 1841 a->size = oldmat->size; 1842 a->rank = oldmat->rank; 1843 mat->insertmode = NOT_SET_VALUES; 1844 a->nvec = oldmat->nvec; 1845 a->donotstash = oldmat->donotstash; 1846 1847 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1848 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1849 1850 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1851 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1852 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1853 1854 *newmat = mat; 1855 PetscFunctionReturn(0); 1856 } 1857 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1860 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat) 1861 { 1862 PetscErrorCode ierr; 1863 PetscMPIInt rank,size; 1864 PetscInt *rowners,i,m,nz,j; 1865 PetscScalar *array,*vals,*vals_ptr; 1866 MPI_Status status; 1867 1868 PetscFunctionBegin; 1869 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1870 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1871 1872 /* determine ownership of all rows */ 1873 m = M/size + ((M % size) > rank); 1874 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1875 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1876 rowners[0] = 0; 1877 for (i=2; i<=size; i++) { 1878 rowners[i] += rowners[i-1]; 1879 } 1880 1881 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1882 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1883 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1884 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1885 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1886 1887 if (!rank) { 1888 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1889 1890 /* read in my part of the matrix numerical values */ 1891 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1892 1893 /* insert into matrix-by row (this is why cannot directly read into array */ 1894 vals_ptr = vals; 1895 for (i=0; i<m; i++) { 1896 for (j=0; j<N; j++) { 1897 array[i + j*m] = *vals_ptr++; 1898 } 1899 } 1900 1901 /* read in other processors and ship out */ 1902 for (i=1; i<size; i++) { 1903 nz = (rowners[i+1] - rowners[i])*N; 1904 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1905 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1906 } 1907 } else { 1908 /* receive numeric values */ 1909 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1910 1911 /* receive message of values*/ 1912 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1913 1914 /* insert into matrix-by row (this is why cannot directly read into array */ 1915 vals_ptr = vals; 1916 for (i=0; i<m; i++) { 1917 for (j=0; j<N; j++) { 1918 array[i + j*m] = *vals_ptr++; 1919 } 1920 } 1921 } 1922 ierr = PetscFree(rowners);CHKERRQ(ierr); 1923 ierr = PetscFree(vals);CHKERRQ(ierr); 1924 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1925 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1926 PetscFunctionReturn(0); 1927 } 1928 1929 #undef __FUNCT__ 1930 #define __FUNCT__ "MatLoad_MPIDense" 1931 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1932 { 1933 Mat A; 1934 PetscScalar *vals,*svals; 1935 MPI_Comm comm = ((PetscObject)viewer)->comm; 1936 MPI_Status status; 1937 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1938 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1939 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1940 PetscInt i,nz,j,rstart,rend; 1941 int fd; 1942 PetscErrorCode ierr; 1943 1944 PetscFunctionBegin; 1945 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1946 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1947 if (!rank) { 1948 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1949 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1950 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1951 } 1952 1953 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1954 M = header[1]; N = header[2]; nz = header[3]; 1955 1956 /* 1957 Handle case where matrix is stored on disk as a dense matrix 1958 */ 1959 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1960 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1961 PetscFunctionReturn(0); 1962 } 1963 1964 /* determine ownership of all rows */ 1965 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1966 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1967 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1968 rowners[0] = 0; 1969 for (i=2; i<=size; i++) { 1970 rowners[i] += rowners[i-1]; 1971 } 1972 rstart = rowners[rank]; 1973 rend = rowners[rank+1]; 1974 1975 /* distribute row lengths to all processors */ 1976 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1977 if (!rank) { 1978 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1979 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1980 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1981 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1982 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1983 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1984 } else { 1985 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1986 } 1987 1988 if (!rank) { 1989 /* calculate the number of nonzeros on each processor */ 1990 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1991 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1992 for (i=0; i<size; i++) { 1993 for (j=rowners[i]; j< rowners[i+1]; j++) { 1994 procsnz[i] += rowlengths[j]; 1995 } 1996 } 1997 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1998 1999 /* determine max buffer needed and allocate it */ 2000 maxnz = 0; 2001 for (i=0; i<size; i++) { 2002 maxnz = PetscMax(maxnz,procsnz[i]); 2003 } 2004 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2005 2006 /* read in my part of the matrix column indices */ 2007 nz = procsnz[0]; 2008 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2009 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2010 2011 /* read in every one elses and ship off */ 2012 for (i=1; i<size; i++) { 2013 nz = procsnz[i]; 2014 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2015 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2016 } 2017 ierr = PetscFree(cols);CHKERRQ(ierr); 2018 } else { 2019 /* determine buffer space needed for message */ 2020 nz = 0; 2021 for (i=0; i<m; i++) { 2022 nz += ourlens[i]; 2023 } 2024 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2025 2026 /* receive message of column indices*/ 2027 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2028 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2029 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2030 } 2031 2032 /* loop over local rows, determining number of off diagonal entries */ 2033 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2034 jj = 0; 2035 for (i=0; i<m; i++) { 2036 for (j=0; j<ourlens[i]; j++) { 2037 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2038 jj++; 2039 } 2040 } 2041 2042 /* create our matrix */ 2043 for (i=0; i<m; i++) { 2044 ourlens[i] -= offlens[i]; 2045 } 2046 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 2047 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2048 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 2049 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 2050 A = *newmat; 2051 for (i=0; i<m; i++) { 2052 ourlens[i] += offlens[i]; 2053 } 2054 2055 if (!rank) { 2056 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2057 2058 /* read in my part of the matrix numerical values */ 2059 nz = procsnz[0]; 2060 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2061 2062 /* insert into matrix */ 2063 jj = rstart; 2064 smycols = mycols; 2065 svals = vals; 2066 for (i=0; i<m; i++) { 2067 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2068 smycols += ourlens[i]; 2069 svals += ourlens[i]; 2070 jj++; 2071 } 2072 2073 /* read in other processors and ship out */ 2074 for (i=1; i<size; i++) { 2075 nz = procsnz[i]; 2076 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2077 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2078 } 2079 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2080 } else { 2081 /* receive numeric values */ 2082 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2083 2084 /* receive message of values*/ 2085 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2086 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2087 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2088 2089 /* insert into matrix */ 2090 jj = rstart; 2091 smycols = mycols; 2092 svals = vals; 2093 for (i=0; i<m; i++) { 2094 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2095 smycols += ourlens[i]; 2096 svals += ourlens[i]; 2097 jj++; 2098 } 2099 } 2100 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2101 ierr = PetscFree(vals);CHKERRQ(ierr); 2102 ierr = PetscFree(mycols);CHKERRQ(ierr); 2103 ierr = PetscFree(rowners);CHKERRQ(ierr); 2104 2105 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2106 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2107 PetscFunctionReturn(0); 2108 } 2109 2110 #undef __FUNCT__ 2111 #define __FUNCT__ "MatEqual_MPIDense" 2112 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2113 { 2114 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2115 Mat a,b; 2116 PetscTruth flg; 2117 PetscErrorCode ierr; 2118 2119 PetscFunctionBegin; 2120 a = matA->A; 2121 b = matB->A; 2122 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2123 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2124 PetscFunctionReturn(0); 2125 } 2126 2127 #if defined(PETSC_HAVE_PLAPACK) 2128 2129 #undef __FUNCT__ 2130 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2131 /*@C 2132 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2133 Level: developer 2134 2135 .keywords: Petsc, destroy, package, PLAPACK 2136 .seealso: PetscFinalize() 2137 @*/ 2138 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2139 { 2140 PetscErrorCode ierr; 2141 2142 PetscFunctionBegin; 2143 ierr = PLA_Finalize();CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 #undef __FUNCT__ 2148 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2149 /*@C 2150 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2151 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2152 2153 Input Parameter: 2154 . comm - the communicator the matrix lives on 2155 2156 Level: developer 2157 2158 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2159 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2160 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2161 cannot overlap. 2162 2163 .keywords: Petsc, initialize, package, PLAPACK 2164 .seealso: PetscInitializePackage(), PetscInitialize() 2165 @*/ 2166 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2167 { 2168 PetscMPIInt size; 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 if (!PLA_Initialized(PETSC_NULL)) { 2173 2174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2175 Plapack_nprows = 1; 2176 Plapack_npcols = size; 2177 2178 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2179 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2180 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2181 #if defined(PETSC_USE_DEBUG) 2182 Plapack_ierror = 3; 2183 #else 2184 Plapack_ierror = 0; 2185 #endif 2186 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2187 if (Plapack_ierror){ 2188 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2189 } else { 2190 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2191 } 2192 2193 Plapack_nb_alg = 0; 2194 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2195 if (Plapack_nb_alg) { 2196 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2197 } 2198 PetscOptionsEnd(); 2199 2200 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2201 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2202 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2203 } 2204 PetscFunctionReturn(0); 2205 } 2206 2207 #endif 2208