1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 8 #if defined(PETSC_HAVE_PLAPACK) 9 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 10 static MPI_Comm Plapack_comm_2d; 11 EXTERN_C_BEGIN 12 #include "PLA.h" 13 EXTERN_C_END 14 15 typedef struct { 16 PLA_Obj A,pivots; 17 PLA_Template templ; 18 MPI_Datatype datatype; 19 PetscInt nb,rstart; 20 VecScatter ctx; 21 IS is_pla,is_petsc; 22 PetscBool pla_solved; 23 MatStructure mstruct; 24 } Mat_Plapack; 25 #endif 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "MatDenseGetLocalMatrix" 29 /*@ 30 31 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 32 matrix that represents the operator. For sequential matrices it returns itself. 33 34 Input Parameter: 35 . A - the Seq or MPI dense matrix 36 37 Output Parameter: 38 . B - the inner matrix 39 40 Level: intermediate 41 42 @*/ 43 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscBool flg; 48 49 PetscFunctionBegin; 50 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 51 if (flg) { 52 *B = mat->A; 53 } else { 54 *B = A; 55 } 56 PetscFunctionReturn(0); 57 } 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatGetRow_MPIDense" 61 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 62 { 63 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 66 67 PetscFunctionBegin; 68 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 69 lrow = row - rstart; 70 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatRestoreRow_MPIDense" 76 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 77 { 78 PetscErrorCode ierr; 79 80 PetscFunctionBegin; 81 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 82 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 83 PetscFunctionReturn(0); 84 } 85 86 EXTERN_C_BEGIN 87 #undef __FUNCT__ 88 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 89 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *B) 90 { 91 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 92 PetscErrorCode ierr; 93 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 94 PetscScalar *array; 95 MPI_Comm comm; 96 97 PetscFunctionBegin; 98 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 99 100 /* The reuse aspect is not implemented efficiently */ 101 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 102 103 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 104 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 105 ierr = MatCreate(comm,B);CHKERRQ(ierr); 106 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 107 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 108 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 109 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 110 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 111 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 113 *iscopy = PETSC_TRUE; 114 PetscFunctionReturn(0); 115 } 116 EXTERN_C_END 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPIDense" 120 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 123 PetscErrorCode ierr; 124 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 125 PetscBool roworiented = A->roworiented; 126 127 PetscFunctionBegin; 128 if (v) PetscValidScalarPointer(v,6); 129 for (i=0; i<m; i++) { 130 if (idxm[i] < 0) continue; 131 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 132 if (idxm[i] >= rstart && idxm[i] < rend) { 133 row = idxm[i] - rstart; 134 if (roworiented) { 135 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 136 } else { 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 141 } 142 } 143 } else { 144 if (!A->donotstash) { 145 if (roworiented) { 146 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 147 } else { 148 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 149 } 150 } 151 } 152 } 153 PetscFunctionReturn(0); 154 } 155 156 #undef __FUNCT__ 157 #define __FUNCT__ "MatGetValues_MPIDense" 158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 159 { 160 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 161 PetscErrorCode ierr; 162 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 163 164 PetscFunctionBegin; 165 for (i=0; i<m; i++) { 166 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 167 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 168 if (idxm[i] >= rstart && idxm[i] < rend) { 169 row = idxm[i] - rstart; 170 for (j=0; j<n; j++) { 171 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 172 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 173 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 174 } 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 176 } 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatGetArray_MPIDense" 182 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 183 { 184 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 185 PetscErrorCode ierr; 186 187 PetscFunctionBegin; 188 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 194 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 195 { 196 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 197 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 198 PetscErrorCode ierr; 199 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 200 const PetscInt *irow,*icol; 201 PetscScalar *av,*bv,*v = lmat->v; 202 Mat newmat; 203 IS iscol_local; 204 205 PetscFunctionBegin; 206 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 207 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 208 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 209 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 210 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 211 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 212 213 /* No parallel redistribution currently supported! Should really check each index set 214 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 215 original matrix! */ 216 217 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 218 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 219 220 /* Check submatrix call */ 221 if (scall == MAT_REUSE_MATRIX) { 222 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 223 /* Really need to test rows and column sizes! */ 224 newmat = *B; 225 } else { 226 /* Create and fill new matrix */ 227 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 228 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 229 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 230 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 231 } 232 233 /* Now extract the data pointers and do the copy, column at a time */ 234 newmatd = (Mat_MPIDense*)newmat->data; 235 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 236 237 for (i=0; i<Ncols; i++) { 238 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 239 for (j=0; j<nrows; j++) { 240 *bv++ = av[irow[j] - rstart]; 241 } 242 } 243 244 /* Assemble the matrices so that the correct flags are set */ 245 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 246 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 248 /* Free work space */ 249 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 250 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 251 *B = newmat; 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatRestoreArray_MPIDense" 257 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 258 { 259 PetscFunctionBegin; 260 PetscFunctionReturn(0); 261 } 262 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 265 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 266 { 267 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 268 MPI_Comm comm = ((PetscObject)mat)->comm; 269 PetscErrorCode ierr; 270 PetscInt nstash,reallocs; 271 InsertMode addv; 272 273 PetscFunctionBegin; 274 /* make sure all processors are either in INSERTMODE or ADDMODE */ 275 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 276 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 277 mat->insertmode = addv; /* in case this processor had no cache */ 278 279 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 280 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 281 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 282 PetscFunctionReturn(0); 283 } 284 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 287 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 288 { 289 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 290 PetscErrorCode ierr; 291 PetscInt i,*row,*col,flg,j,rstart,ncols; 292 PetscMPIInt n; 293 PetscScalar *val; 294 InsertMode addv=mat->insertmode; 295 296 PetscFunctionBegin; 297 /* wait on receives */ 298 while (1) { 299 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 300 if (!flg) break; 301 302 for (i=0; i<n;) { 303 /* Now identify the consecutive vals belonging to the same row */ 304 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 305 if (j < n) ncols = j-i; 306 else ncols = n-i; 307 /* Now assemble all these values with a single function call */ 308 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 309 i = j; 310 } 311 } 312 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 313 314 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 315 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 316 317 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 318 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 319 } 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNCT__ 324 #define __FUNCT__ "MatZeroEntries_MPIDense" 325 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 326 { 327 PetscErrorCode ierr; 328 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 329 330 PetscFunctionBegin; 331 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 /* the code does not do the diagonal entries correctly unless the 336 matrix is square and the column and row owerships are identical. 337 This is a BUG. The only way to fix it seems to be to access 338 mdn->A and mdn->B directly and not through the MatZeroRows() 339 routine. 340 */ 341 #undef __FUNCT__ 342 #define __FUNCT__ "MatZeroRows_MPIDense" 343 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 344 { 345 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 346 PetscErrorCode ierr; 347 PetscInt i,*owners = A->rmap->range; 348 PetscInt *nprocs,j,idx,nsends; 349 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 350 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 351 PetscInt *lens,*lrows,*values; 352 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 353 MPI_Comm comm = ((PetscObject)A)->comm; 354 MPI_Request *send_waits,*recv_waits; 355 MPI_Status recv_status,*send_status; 356 PetscBool found; 357 const PetscScalar *xx; 358 PetscScalar *bb; 359 360 PetscFunctionBegin; 361 /* first count number of contributors to each processor */ 362 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 363 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 364 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 365 for (i=0; i<N; i++) { 366 idx = rows[i]; 367 found = PETSC_FALSE; 368 for (j=0; j<size; j++) { 369 if (idx >= owners[j] && idx < owners[j+1]) { 370 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 371 } 372 } 373 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 374 } 375 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 376 377 /* inform other processors of number of messages and max length*/ 378 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 379 380 /* post receives: */ 381 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 382 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 383 for (i=0; i<nrecvs; i++) { 384 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 385 } 386 387 /* do sends: 388 1) starts[i] gives the starting index in svalues for stuff going to 389 the ith processor 390 */ 391 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 392 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 393 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 394 starts[0] = 0; 395 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 396 for (i=0; i<N; i++) { 397 svalues[starts[owner[i]]++] = rows[i]; 398 } 399 400 starts[0] = 0; 401 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 402 count = 0; 403 for (i=0; i<size; i++) { 404 if (nprocs[2*i+1]) { 405 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 406 } 407 } 408 ierr = PetscFree(starts);CHKERRQ(ierr); 409 410 base = owners[rank]; 411 412 /* wait on receives */ 413 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 414 count = nrecvs; 415 slen = 0; 416 while (count) { 417 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 418 /* unpack receives into our local space */ 419 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 420 source[imdex] = recv_status.MPI_SOURCE; 421 lens[imdex] = n; 422 slen += n; 423 count--; 424 } 425 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 426 427 /* move the data into the send scatter */ 428 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 429 count = 0; 430 for (i=0; i<nrecvs; i++) { 431 values = rvalues + i*nmax; 432 for (j=0; j<lens[i]; j++) { 433 lrows[count++] = values[j] - base; 434 } 435 } 436 ierr = PetscFree(rvalues);CHKERRQ(ierr); 437 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 438 ierr = PetscFree(owner);CHKERRQ(ierr); 439 ierr = PetscFree(nprocs);CHKERRQ(ierr); 440 441 /* fix right hand side if needed */ 442 if (x && b) { 443 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 444 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 445 for (i=0; i<slen; i++) { 446 bb[lrows[i]] = diag*xx[lrows[i]]; 447 } 448 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 449 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 450 } 451 452 /* actually zap the local rows */ 453 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 454 ierr = PetscFree(lrows);CHKERRQ(ierr); 455 456 /* wait on sends */ 457 if (nsends) { 458 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 459 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 460 ierr = PetscFree(send_status);CHKERRQ(ierr); 461 } 462 ierr = PetscFree(send_waits);CHKERRQ(ierr); 463 ierr = PetscFree(svalues);CHKERRQ(ierr); 464 465 PetscFunctionReturn(0); 466 } 467 468 #undef __FUNCT__ 469 #define __FUNCT__ "MatMult_MPIDense" 470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 471 { 472 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 473 PetscErrorCode ierr; 474 475 PetscFunctionBegin; 476 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatMultAdd_MPIDense" 484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 491 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 492 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatMultTranspose_MPIDense" 498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 499 { 500 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 501 PetscErrorCode ierr; 502 PetscScalar zero = 0.0; 503 504 PetscFunctionBegin; 505 ierr = VecSet(yy,zero);CHKERRQ(ierr); 506 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 507 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 508 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 515 { 516 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 517 PetscErrorCode ierr; 518 519 PetscFunctionBegin; 520 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 521 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 522 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 523 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 524 PetscFunctionReturn(0); 525 } 526 527 #undef __FUNCT__ 528 #define __FUNCT__ "MatGetDiagonal_MPIDense" 529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 530 { 531 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 532 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 533 PetscErrorCode ierr; 534 PetscInt len,i,n,m = A->rmap->n,radd; 535 PetscScalar *x,zero = 0.0; 536 537 PetscFunctionBegin; 538 ierr = VecSet(v,zero);CHKERRQ(ierr); 539 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 540 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 541 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 542 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 543 radd = A->rmap->rstart*m; 544 for (i=0; i<len; i++) { 545 x[i] = aloc->v[radd + i*m + i]; 546 } 547 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatDestroy_MPIDense" 553 PetscErrorCode MatDestroy_MPIDense(Mat mat) 554 { 555 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 556 PetscErrorCode ierr; 557 #if defined(PETSC_HAVE_PLAPACK) 558 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 559 #endif 560 561 PetscFunctionBegin; 562 563 #if defined(PETSC_USE_LOG) 564 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 565 #endif 566 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 567 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 568 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 569 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 570 #if defined(PETSC_HAVE_PLAPACK) 571 if (lu) { 572 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 573 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 574 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 575 576 if (lu->is_pla) { 577 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 578 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 579 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 580 } 581 } 582 #endif 583 584 ierr = PetscFree(mdn);CHKERRQ(ierr); 585 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 594 #undef __FUNCT__ 595 #define __FUNCT__ "MatView_MPIDense_Binary" 596 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 597 { 598 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 599 PetscErrorCode ierr; 600 PetscViewerFormat format; 601 int fd; 602 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 603 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 604 PetscScalar *work,*v,*vv; 605 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 606 MPI_Status status; 607 608 PetscFunctionBegin; 609 if (mdn->size == 1) { 610 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 611 } else { 612 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 613 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 614 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 615 616 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 617 if (format == PETSC_VIEWER_NATIVE) { 618 619 if (!rank) { 620 /* store the matrix as a dense matrix */ 621 header[0] = MAT_FILE_CLASSID; 622 header[1] = mat->rmap->N; 623 header[2] = N; 624 header[3] = MATRIX_BINARY_FORMAT_DENSE; 625 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 626 627 /* get largest work array needed for transposing array */ 628 mmax = mat->rmap->n; 629 for (i=1; i<size; i++) { 630 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 631 } 632 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 633 634 /* write out local array, by rows */ 635 m = mat->rmap->n; 636 v = a->v; 637 for (j=0; j<N; j++) { 638 for (i=0; i<m; i++) { 639 work[j + i*N] = *v++; 640 } 641 } 642 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 643 /* get largest work array to receive messages from other processes, excludes process zero */ 644 mmax = 0; 645 for (i=1; i<size; i++) { 646 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 647 } 648 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 649 for(k = 1; k < size; k++) { 650 v = vv; 651 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 652 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 653 654 for(j = 0; j < N; j++) { 655 for(i = 0; i < m; i++) { 656 work[j + i*N] = *v++; 657 } 658 } 659 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 660 } 661 ierr = PetscFree(work);CHKERRQ(ierr); 662 ierr = PetscFree(vv);CHKERRQ(ierr); 663 } else { 664 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 665 } 666 } else { 667 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 668 } 669 } 670 PetscFunctionReturn(0); 671 } 672 673 #undef __FUNCT__ 674 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 675 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 676 { 677 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 678 PetscErrorCode ierr; 679 PetscMPIInt size = mdn->size,rank = mdn->rank; 680 const PetscViewerType vtype; 681 PetscBool iascii,isdraw; 682 PetscViewer sviewer; 683 PetscViewerFormat format; 684 #if defined(PETSC_HAVE_PLAPACK) 685 Mat_Plapack *lu; 686 #endif 687 688 PetscFunctionBegin; 689 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 690 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 691 if (iascii) { 692 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 693 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 694 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 695 MatInfo info; 696 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 697 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 698 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 699 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 700 #if defined(PETSC_HAVE_PLAPACK) 701 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 702 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 703 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 704 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 705 if (mat->factortype){ 706 lu=(Mat_Plapack*)(mat->spptr); 707 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 708 } 709 #else 710 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 711 #endif 712 PetscFunctionReturn(0); 713 } else if (format == PETSC_VIEWER_ASCII_INFO) { 714 PetscFunctionReturn(0); 715 } 716 } else if (isdraw) { 717 PetscDraw draw; 718 PetscBool isnull; 719 720 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 721 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 722 if (isnull) PetscFunctionReturn(0); 723 } 724 725 if (size == 1) { 726 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 727 } else { 728 /* assemble the entire matrix onto first processor. */ 729 Mat A; 730 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 731 PetscInt *cols; 732 PetscScalar *vals; 733 734 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 735 if (!rank) { 736 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 737 } else { 738 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 739 } 740 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 741 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 742 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 743 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 744 745 /* Copy the matrix ... This isn't the most efficient means, 746 but it's quick for now */ 747 A->insertmode = INSERT_VALUES; 748 row = mat->rmap->rstart; m = mdn->A->rmap->n; 749 for (i=0; i<m; i++) { 750 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 751 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 752 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 753 row++; 754 } 755 756 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 757 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 758 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 759 if (!rank) { 760 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 761 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 762 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 763 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 764 } 765 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 766 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 767 ierr = MatDestroy(A);CHKERRQ(ierr); 768 } 769 PetscFunctionReturn(0); 770 } 771 772 #undef __FUNCT__ 773 #define __FUNCT__ "MatView_MPIDense" 774 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 775 { 776 PetscErrorCode ierr; 777 PetscBool iascii,isbinary,isdraw,issocket; 778 779 PetscFunctionBegin; 780 781 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 782 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 783 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 784 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 785 786 if (iascii || issocket || isdraw) { 787 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 788 } else if (isbinary) { 789 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 790 } else { 791 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 792 } 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatGetInfo_MPIDense" 798 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 799 { 800 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 801 Mat mdn = mat->A; 802 PetscErrorCode ierr; 803 PetscReal isend[5],irecv[5]; 804 805 PetscFunctionBegin; 806 info->block_size = 1.0; 807 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 808 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 809 isend[3] = info->memory; isend[4] = info->mallocs; 810 if (flag == MAT_LOCAL) { 811 info->nz_used = isend[0]; 812 info->nz_allocated = isend[1]; 813 info->nz_unneeded = isend[2]; 814 info->memory = isend[3]; 815 info->mallocs = isend[4]; 816 } else if (flag == MAT_GLOBAL_MAX) { 817 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 818 info->nz_used = irecv[0]; 819 info->nz_allocated = irecv[1]; 820 info->nz_unneeded = irecv[2]; 821 info->memory = irecv[3]; 822 info->mallocs = irecv[4]; 823 } else if (flag == MAT_GLOBAL_SUM) { 824 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 825 info->nz_used = irecv[0]; 826 info->nz_allocated = irecv[1]; 827 info->nz_unneeded = irecv[2]; 828 info->memory = irecv[3]; 829 info->mallocs = irecv[4]; 830 } 831 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 832 info->fill_ratio_needed = 0; 833 info->factor_mallocs = 0; 834 PetscFunctionReturn(0); 835 } 836 837 #undef __FUNCT__ 838 #define __FUNCT__ "MatSetOption_MPIDense" 839 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 840 { 841 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 842 PetscErrorCode ierr; 843 844 PetscFunctionBegin; 845 switch (op) { 846 case MAT_NEW_NONZERO_LOCATIONS: 847 case MAT_NEW_NONZERO_LOCATION_ERR: 848 case MAT_NEW_NONZERO_ALLOCATION_ERR: 849 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 850 break; 851 case MAT_ROW_ORIENTED: 852 a->roworiented = flg; 853 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 854 break; 855 case MAT_NEW_DIAGONALS: 856 case MAT_USE_HASH_TABLE: 857 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 858 break; 859 case MAT_IGNORE_OFF_PROC_ENTRIES: 860 a->donotstash = flg; 861 break; 862 case MAT_SYMMETRIC: 863 case MAT_STRUCTURALLY_SYMMETRIC: 864 case MAT_HERMITIAN: 865 case MAT_SYMMETRY_ETERNAL: 866 case MAT_IGNORE_LOWER_TRIANGULAR: 867 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 868 break; 869 default: 870 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 871 } 872 PetscFunctionReturn(0); 873 } 874 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "MatDiagonalScale_MPIDense" 878 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 879 { 880 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 881 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 882 PetscScalar *l,*r,x,*v; 883 PetscErrorCode ierr; 884 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 885 886 PetscFunctionBegin; 887 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 888 if (ll) { 889 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 890 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 891 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 892 for (i=0; i<m; i++) { 893 x = l[i]; 894 v = mat->v + i; 895 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 896 } 897 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 898 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 899 } 900 if (rr) { 901 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 902 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 903 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 906 for (i=0; i<n; i++) { 907 x = r[i]; 908 v = mat->v + i*m; 909 for (j=0; j<m; j++) { (*v++) *= x;} 910 } 911 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 912 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 913 } 914 PetscFunctionReturn(0); 915 } 916 917 #undef __FUNCT__ 918 #define __FUNCT__ "MatNorm_MPIDense" 919 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 920 { 921 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 922 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 923 PetscErrorCode ierr; 924 PetscInt i,j; 925 PetscReal sum = 0.0; 926 PetscScalar *v = mat->v; 927 928 PetscFunctionBegin; 929 if (mdn->size == 1) { 930 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 931 } else { 932 if (type == NORM_FROBENIUS) { 933 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 934 #if defined(PETSC_USE_COMPLEX) 935 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 936 #else 937 sum += (*v)*(*v); v++; 938 #endif 939 } 940 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 941 *nrm = sqrt(*nrm); 942 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 943 } else if (type == NORM_1) { 944 PetscReal *tmp,*tmp2; 945 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 946 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 947 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 948 *nrm = 0.0; 949 v = mat->v; 950 for (j=0; j<mdn->A->cmap->n; j++) { 951 for (i=0; i<mdn->A->rmap->n; i++) { 952 tmp[j] += PetscAbsScalar(*v); v++; 953 } 954 } 955 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 956 for (j=0; j<A->cmap->N; j++) { 957 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 958 } 959 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 960 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 961 } else if (type == NORM_INFINITY) { /* max row norm */ 962 PetscReal ntemp; 963 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 964 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 965 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 966 } 967 PetscFunctionReturn(0); 968 } 969 970 #undef __FUNCT__ 971 #define __FUNCT__ "MatTranspose_MPIDense" 972 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 973 { 974 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 975 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 976 Mat B; 977 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 978 PetscErrorCode ierr; 979 PetscInt j,i; 980 PetscScalar *v; 981 982 PetscFunctionBegin; 983 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 984 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 985 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 986 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 987 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 988 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 989 } else { 990 B = *matout; 991 } 992 993 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 994 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 995 for (i=0; i<m; i++) rwork[i] = rstart + i; 996 for (j=0; j<n; j++) { 997 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 998 v += m; 999 } 1000 ierr = PetscFree(rwork);CHKERRQ(ierr); 1001 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1002 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1003 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1004 *matout = B; 1005 } else { 1006 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1007 } 1008 PetscFunctionReturn(0); 1009 } 1010 1011 1012 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1013 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1017 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1018 { 1019 PetscErrorCode ierr; 1020 1021 PetscFunctionBegin; 1022 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1023 PetscFunctionReturn(0); 1024 } 1025 1026 #if defined(PETSC_HAVE_PLAPACK) 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1030 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1031 { 1032 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1033 PetscErrorCode ierr; 1034 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1035 PetscScalar *array; 1036 PetscReal one = 1.0; 1037 1038 PetscFunctionBegin; 1039 /* Copy A into F->lu->A */ 1040 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1041 ierr = PLA_API_begin();CHKERRQ(ierr); 1042 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1043 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1044 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1045 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1046 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1047 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1048 ierr = PLA_API_end();CHKERRQ(ierr); 1049 lu->rstart = rstart; 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1055 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1056 { 1057 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1058 PetscErrorCode ierr; 1059 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1060 PetscScalar *array; 1061 PetscReal one = 1.0; 1062 1063 PetscFunctionBegin; 1064 /* Copy F into A->lu->A */ 1065 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1066 ierr = PLA_API_begin();CHKERRQ(ierr); 1067 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1068 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1069 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1070 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1071 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1072 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1073 ierr = PLA_API_end();CHKERRQ(ierr); 1074 lu->rstart = rstart; 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1080 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1081 { 1082 PetscErrorCode ierr; 1083 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1084 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1085 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1086 PLA_Obj alpha = NULL,beta = NULL; 1087 1088 PetscFunctionBegin; 1089 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1090 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1091 1092 /* 1093 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1094 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1095 */ 1096 1097 /* do the multiply in PLA */ 1098 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1099 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1100 CHKMEMQ; 1101 1102 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1103 CHKMEMQ; 1104 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1105 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1106 1107 /* 1108 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1109 */ 1110 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1116 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1117 { 1118 PetscErrorCode ierr; 1119 PetscInt m=A->rmap->n,n=B->cmap->n; 1120 Mat Cmat; 1121 1122 PetscFunctionBegin; 1123 if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1124 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1125 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1126 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1127 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1128 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1129 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1130 1131 *C = Cmat; 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1137 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1138 { 1139 PetscErrorCode ierr; 1140 1141 PetscFunctionBegin; 1142 if (scall == MAT_INITIAL_MATRIX){ 1143 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1144 } 1145 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatSolve_MPIDense" 1151 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1152 { 1153 MPI_Comm comm = ((PetscObject)A)->comm; 1154 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1155 PetscErrorCode ierr; 1156 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1157 PetscScalar *array; 1158 PetscReal one = 1.0; 1159 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1160 PLA_Obj v_pla = NULL; 1161 PetscScalar *loc_buf; 1162 Vec loc_x; 1163 1164 PetscFunctionBegin; 1165 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1166 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1167 1168 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1169 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1170 PLA_Obj_set_to_zero(v_pla); 1171 1172 /* Copy b into rhs_pla */ 1173 PLA_API_begin(); 1174 PLA_Obj_API_open(v_pla); 1175 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1176 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1177 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1178 PLA_Obj_API_close(v_pla); 1179 PLA_API_end(); 1180 1181 if (A->factortype == MAT_FACTOR_LU){ 1182 /* Apply the permutations to the right hand sides */ 1183 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1184 1185 /* Solve L y = b, overwriting b with y */ 1186 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1187 1188 /* Solve U x = y (=b), overwriting b with x */ 1189 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1190 } else { /* MAT_FACTOR_CHOLESKY */ 1191 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1192 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1193 PLA_NONUNIT_DIAG,lu->A,v_pla); 1194 } 1195 1196 /* Copy PLAPACK x into Petsc vector x */ 1197 PLA_Obj_local_length(v_pla, &loc_m); 1198 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1199 PLA_Obj_local_stride(v_pla, &loc_stride); 1200 /* 1201 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1202 */ 1203 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1204 if (!lu->pla_solved){ 1205 1206 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1207 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1208 1209 /* Create IS and cts for VecScatterring */ 1210 PLA_Obj_local_length(v_pla, &loc_m); 1211 PLA_Obj_local_stride(v_pla, &loc_stride); 1212 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1213 1214 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1215 for (i=0; i<loc_m; i+=lu->nb){ 1216 j = 0; 1217 while (j < lu->nb && i+j < loc_m){ 1218 idx_petsc[i+j] = rstart + j; j++; 1219 } 1220 rstart += size*lu->nb; 1221 } 1222 1223 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1224 1225 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);CHKERRQ(ierr); 1226 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);CHKERRQ(ierr); 1227 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1228 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1229 } 1230 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1231 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 1233 /* Free data */ 1234 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1235 PLA_Obj_free(&v_pla); 1236 1237 lu->pla_solved = PETSC_TRUE; 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1243 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1244 { 1245 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1246 PetscErrorCode ierr; 1247 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1248 PetscInt info_pla=0; 1249 PetscScalar *array,one = 1.0; 1250 1251 PetscFunctionBegin; 1252 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1253 PLA_Obj_free(&lu->A); 1254 PLA_Obj_free (&lu->pivots); 1255 } 1256 /* Create PLAPACK matrix object */ 1257 lu->A = NULL; lu->pivots = NULL; 1258 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1259 PLA_Obj_set_to_zero(lu->A); 1260 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1261 1262 /* Copy A into lu->A */ 1263 PLA_API_begin(); 1264 PLA_Obj_API_open(lu->A); 1265 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1266 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1267 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1268 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1269 PLA_Obj_API_close(lu->A); 1270 PLA_API_end(); 1271 1272 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1273 info_pla = PLA_LU(lu->A,lu->pivots); 1274 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1275 1276 lu->rstart = rstart; 1277 lu->mstruct = SAME_NONZERO_PATTERN; 1278 F->ops->solve = MatSolve_MPIDense; 1279 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1285 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1286 { 1287 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1288 PetscErrorCode ierr; 1289 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1290 PetscInt info_pla=0; 1291 PetscScalar *array,one = 1.0; 1292 1293 PetscFunctionBegin; 1294 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1295 PLA_Obj_free(&lu->A); 1296 } 1297 /* Create PLAPACK matrix object */ 1298 lu->A = NULL; 1299 lu->pivots = NULL; 1300 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1301 1302 /* Copy A into lu->A */ 1303 PLA_API_begin(); 1304 PLA_Obj_API_open(lu->A); 1305 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1306 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1307 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1308 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1309 PLA_Obj_API_close(lu->A); 1310 PLA_API_end(); 1311 1312 /* Factor P A -> Chol */ 1313 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1314 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1315 1316 lu->rstart = rstart; 1317 lu->mstruct = SAME_NONZERO_PATTERN; 1318 F->ops->solve = MatSolve_MPIDense; 1319 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1320 PetscFunctionReturn(0); 1321 } 1322 1323 /* Note the Petsc perm permutation is ignored */ 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1326 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1327 { 1328 PetscErrorCode ierr; 1329 PetscBool issymmetric,set; 1330 1331 PetscFunctionBegin; 1332 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1333 if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1334 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1335 PetscFunctionReturn(0); 1336 } 1337 1338 /* Note the Petsc r and c permutations are ignored */ 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1341 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1342 { 1343 PetscErrorCode ierr; 1344 PetscInt M = A->rmap->N; 1345 Mat_Plapack *lu; 1346 1347 PetscFunctionBegin; 1348 lu = (Mat_Plapack*)F->spptr; 1349 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1350 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1351 PetscFunctionReturn(0); 1352 } 1353 1354 EXTERN_C_BEGIN 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1357 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1358 { 1359 PetscFunctionBegin; 1360 *type = MATSOLVERPLAPACK; 1361 PetscFunctionReturn(0); 1362 } 1363 EXTERN_C_END 1364 1365 EXTERN_C_BEGIN 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1368 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1369 { 1370 PetscErrorCode ierr; 1371 Mat_Plapack *lu; 1372 PetscMPIInt size; 1373 PetscInt M=A->rmap->N; 1374 1375 PetscFunctionBegin; 1376 /* Create the factorization matrix */ 1377 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1378 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1379 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1380 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1381 (*F)->spptr = (void*)lu; 1382 1383 /* Set default Plapack parameters */ 1384 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1385 lu->nb = M/size; 1386 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1387 1388 /* Set runtime options */ 1389 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1390 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1391 PetscOptionsEnd(); 1392 1393 /* Create object distribution template */ 1394 lu->templ = NULL; 1395 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1396 1397 /* Set the datatype */ 1398 #if defined(PETSC_USE_COMPLEX) 1399 lu->datatype = MPI_DOUBLE_COMPLEX; 1400 #else 1401 lu->datatype = MPI_DOUBLE; 1402 #endif 1403 1404 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1405 1406 1407 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1408 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1409 1410 if (ftype == MAT_FACTOR_LU) { 1411 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1412 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1413 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1414 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1415 (*F)->factortype = ftype; 1416 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 EXTERN_C_END 1420 #endif 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1424 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1425 { 1426 #if defined(PETSC_HAVE_PLAPACK) 1427 PetscErrorCode ierr; 1428 #endif 1429 1430 PetscFunctionBegin; 1431 #if defined(PETSC_HAVE_PLAPACK) 1432 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1433 #else 1434 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1435 #endif 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatAXPY_MPIDense" 1441 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1442 { 1443 PetscErrorCode ierr; 1444 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1445 1446 PetscFunctionBegin; 1447 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatConjugate_MPIDense" 1453 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1454 { 1455 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1456 PetscErrorCode ierr; 1457 1458 PetscFunctionBegin; 1459 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatRealPart_MPIDense" 1465 PetscErrorCode MatRealPart_MPIDense(Mat A) 1466 { 1467 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1468 PetscErrorCode ierr; 1469 1470 PetscFunctionBegin; 1471 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1477 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1478 { 1479 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1480 PetscErrorCode ierr; 1481 1482 PetscFunctionBegin; 1483 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 /* -------------------------------------------------------------------*/ 1488 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1489 MatGetRow_MPIDense, 1490 MatRestoreRow_MPIDense, 1491 MatMult_MPIDense, 1492 /* 4*/ MatMultAdd_MPIDense, 1493 MatMultTranspose_MPIDense, 1494 MatMultTransposeAdd_MPIDense, 1495 0, 1496 0, 1497 0, 1498 /*10*/ 0, 1499 0, 1500 0, 1501 0, 1502 MatTranspose_MPIDense, 1503 /*15*/ MatGetInfo_MPIDense, 1504 MatEqual_MPIDense, 1505 MatGetDiagonal_MPIDense, 1506 MatDiagonalScale_MPIDense, 1507 MatNorm_MPIDense, 1508 /*20*/ MatAssemblyBegin_MPIDense, 1509 MatAssemblyEnd_MPIDense, 1510 MatSetOption_MPIDense, 1511 MatZeroEntries_MPIDense, 1512 /*24*/ MatZeroRows_MPIDense, 1513 0, 1514 0, 1515 0, 1516 0, 1517 /*29*/ MatSetUpPreallocation_MPIDense, 1518 0, 1519 0, 1520 MatGetArray_MPIDense, 1521 MatRestoreArray_MPIDense, 1522 /*34*/ MatDuplicate_MPIDense, 1523 0, 1524 0, 1525 0, 1526 0, 1527 /*39*/ MatAXPY_MPIDense, 1528 MatGetSubMatrices_MPIDense, 1529 0, 1530 MatGetValues_MPIDense, 1531 0, 1532 /*44*/ 0, 1533 MatScale_MPIDense, 1534 0, 1535 0, 1536 0, 1537 /*49*/ 0, 1538 0, 1539 0, 1540 0, 1541 0, 1542 /*54*/ 0, 1543 0, 1544 0, 1545 0, 1546 0, 1547 /*59*/ MatGetSubMatrix_MPIDense, 1548 MatDestroy_MPIDense, 1549 MatView_MPIDense, 1550 0, 1551 0, 1552 /*64*/ 0, 1553 0, 1554 0, 1555 0, 1556 0, 1557 /*69*/ 0, 1558 0, 1559 0, 1560 0, 1561 0, 1562 /*74*/ 0, 1563 0, 1564 0, 1565 0, 1566 0, 1567 /*79*/ 0, 1568 0, 1569 0, 1570 0, 1571 /*83*/ MatLoad_MPIDense, 1572 0, 1573 0, 1574 0, 1575 0, 1576 0, 1577 /*89*/ 1578 #if defined(PETSC_HAVE_PLAPACK) 1579 MatMatMult_MPIDense_MPIDense, 1580 MatMatMultSymbolic_MPIDense_MPIDense, 1581 MatMatMultNumeric_MPIDense_MPIDense, 1582 #else 1583 0, 1584 0, 1585 0, 1586 #endif 1587 0, 1588 0, 1589 /*94*/ 0, 1590 0, 1591 0, 1592 0, 1593 0, 1594 /*99*/ 0, 1595 0, 1596 0, 1597 MatConjugate_MPIDense, 1598 0, 1599 /*104*/0, 1600 MatRealPart_MPIDense, 1601 MatImaginaryPart_MPIDense, 1602 0, 1603 0, 1604 /*109*/0, 1605 0, 1606 0, 1607 0, 1608 0, 1609 /*114*/0, 1610 0, 1611 0, 1612 0, 1613 0, 1614 /*119*/0, 1615 0, 1616 0, 1617 0 1618 }; 1619 1620 EXTERN_C_BEGIN 1621 #undef __FUNCT__ 1622 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1623 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1624 { 1625 Mat_MPIDense *a; 1626 PetscErrorCode ierr; 1627 1628 PetscFunctionBegin; 1629 mat->preallocated = PETSC_TRUE; 1630 /* Note: For now, when data is specified above, this assumes the user correctly 1631 allocates the local dense storage space. We should add error checking. */ 1632 1633 a = (Mat_MPIDense*)mat->data; 1634 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1635 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1636 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1637 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1638 a->nvec = mat->cmap->n; 1639 1640 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1641 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1642 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1643 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1644 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1645 PetscFunctionReturn(0); 1646 } 1647 EXTERN_C_END 1648 1649 /*MC 1650 MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1651 1652 run ./configure with the option --download-plapack 1653 1654 1655 Options Database Keys: 1656 . -mat_plapack_nprows <n> - number of rows in processor partition 1657 . -mat_plapack_npcols <n> - number of columns in processor partition 1658 . -mat_plapack_nb <n> - block size of template vector 1659 . -mat_plapack_nb_alg <n> - algorithmic block size 1660 - -mat_plapack_ckerror <n> - error checking flag 1661 1662 Level: intermediate 1663 1664 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1665 1666 M*/ 1667 1668 EXTERN_C_BEGIN 1669 #undef __FUNCT__ 1670 #define __FUNCT__ "MatCreate_MPIDense" 1671 PetscErrorCode MatCreate_MPIDense(Mat mat) 1672 { 1673 Mat_MPIDense *a; 1674 PetscErrorCode ierr; 1675 1676 PetscFunctionBegin; 1677 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1678 mat->data = (void*)a; 1679 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1680 1681 mat->insertmode = NOT_SET_VALUES; 1682 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1683 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1684 1685 /* build cache for off array entries formed */ 1686 a->donotstash = PETSC_FALSE; 1687 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1688 1689 /* stuff used for matrix vector multiply */ 1690 a->lvec = 0; 1691 a->Mvctx = 0; 1692 a->roworiented = PETSC_TRUE; 1693 1694 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1695 "MatGetDiagonalBlock_MPIDense", 1696 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1697 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1698 "MatMPIDenseSetPreallocation_MPIDense", 1699 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1700 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1701 "MatMatMult_MPIAIJ_MPIDense", 1702 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1703 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1704 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1705 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1706 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1707 "MatMatMultNumeric_MPIAIJ_MPIDense", 1708 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1709 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1710 "MatGetFactor_mpidense_petsc", 1711 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1712 #if defined(PETSC_HAVE_PLAPACK) 1713 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1714 "MatGetFactor_mpidense_plapack", 1715 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1716 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1717 #endif 1718 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1719 1720 PetscFunctionReturn(0); 1721 } 1722 EXTERN_C_END 1723 1724 /*MC 1725 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1726 1727 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1728 and MATMPIDENSE otherwise. 1729 1730 Options Database Keys: 1731 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1732 1733 Level: beginner 1734 1735 1736 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1737 M*/ 1738 1739 EXTERN_C_BEGIN 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatCreate_Dense" 1742 PetscErrorCode MatCreate_Dense(Mat A) 1743 { 1744 PetscErrorCode ierr; 1745 PetscMPIInt size; 1746 1747 PetscFunctionBegin; 1748 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1749 if (size == 1) { 1750 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1751 } else { 1752 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1753 } 1754 PetscFunctionReturn(0); 1755 } 1756 EXTERN_C_END 1757 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1760 /*@C 1761 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1762 1763 Not collective 1764 1765 Input Parameters: 1766 . A - the matrix 1767 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1768 to control all matrix memory allocation. 1769 1770 Notes: 1771 The dense format is fully compatible with standard Fortran 77 1772 storage by columns. 1773 1774 The data input variable is intended primarily for Fortran programmers 1775 who wish to allocate their own matrix memory space. Most users should 1776 set data=PETSC_NULL. 1777 1778 Level: intermediate 1779 1780 .keywords: matrix,dense, parallel 1781 1782 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1783 @*/ 1784 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1785 { 1786 PetscErrorCode ierr; 1787 1788 PetscFunctionBegin; 1789 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 #undef __FUNCT__ 1794 #define __FUNCT__ "MatCreateMPIDense" 1795 /*@C 1796 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1797 1798 Collective on MPI_Comm 1799 1800 Input Parameters: 1801 + comm - MPI communicator 1802 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1803 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1804 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1805 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1806 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1807 to control all matrix memory allocation. 1808 1809 Output Parameter: 1810 . A - the matrix 1811 1812 Notes: 1813 The dense format is fully compatible with standard Fortran 77 1814 storage by columns. 1815 1816 The data input variable is intended primarily for Fortran programmers 1817 who wish to allocate their own matrix memory space. Most users should 1818 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1819 1820 The user MUST specify either the local or global matrix dimensions 1821 (possibly both). 1822 1823 Level: intermediate 1824 1825 .keywords: matrix,dense, parallel 1826 1827 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1828 @*/ 1829 PetscErrorCode MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1830 { 1831 PetscErrorCode ierr; 1832 PetscMPIInt size; 1833 1834 PetscFunctionBegin; 1835 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1836 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1837 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1838 if (size > 1) { 1839 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1840 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1841 } else { 1842 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1843 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatDuplicate_MPIDense" 1850 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1851 { 1852 Mat mat; 1853 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1854 PetscErrorCode ierr; 1855 1856 PetscFunctionBegin; 1857 *newmat = 0; 1858 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1859 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1860 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1861 a = (Mat_MPIDense*)mat->data; 1862 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1863 1864 mat->factortype = A->factortype; 1865 mat->assembled = PETSC_TRUE; 1866 mat->preallocated = PETSC_TRUE; 1867 1868 a->size = oldmat->size; 1869 a->rank = oldmat->rank; 1870 mat->insertmode = NOT_SET_VALUES; 1871 a->nvec = oldmat->nvec; 1872 a->donotstash = oldmat->donotstash; 1873 1874 ierr = PetscLayoutCopy(A->rmap,&mat->rmap);CHKERRQ(ierr); 1875 ierr = PetscLayoutCopy(A->cmap,&mat->cmap);CHKERRQ(ierr); 1876 1877 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1878 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1879 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1880 1881 *newmat = mat; 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1887 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1888 { 1889 PetscErrorCode ierr; 1890 PetscMPIInt rank,size; 1891 PetscInt *rowners,i,m,nz,j; 1892 PetscScalar *array,*vals,*vals_ptr; 1893 MPI_Status status; 1894 1895 PetscFunctionBegin; 1896 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1897 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1898 1899 /* determine ownership of all rows */ 1900 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1901 else m = newmat->rmap->n; 1902 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1903 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1904 rowners[0] = 0; 1905 for (i=2; i<=size; i++) { 1906 rowners[i] += rowners[i-1]; 1907 } 1908 1909 if (!sizesset) { 1910 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1911 } 1912 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1913 ierr = MatGetArray(newmat,&array);CHKERRQ(ierr); 1914 1915 if (!rank) { 1916 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1917 1918 /* read in my part of the matrix numerical values */ 1919 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1920 1921 /* insert into matrix-by row (this is why cannot directly read into array */ 1922 vals_ptr = vals; 1923 for (i=0; i<m; i++) { 1924 for (j=0; j<N; j++) { 1925 array[i + j*m] = *vals_ptr++; 1926 } 1927 } 1928 1929 /* read in other processors and ship out */ 1930 for (i=1; i<size; i++) { 1931 nz = (rowners[i+1] - rowners[i])*N; 1932 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1933 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1934 } 1935 } else { 1936 /* receive numeric values */ 1937 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1938 1939 /* receive message of values*/ 1940 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr); 1941 1942 /* insert into matrix-by row (this is why cannot directly read into array */ 1943 vals_ptr = vals; 1944 for (i=0; i<m; i++) { 1945 for (j=0; j<N; j++) { 1946 array[i + j*m] = *vals_ptr++; 1947 } 1948 } 1949 } 1950 ierr = PetscFree(rowners);CHKERRQ(ierr); 1951 ierr = PetscFree(vals);CHKERRQ(ierr); 1952 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1953 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1954 PetscFunctionReturn(0); 1955 } 1956 1957 #undef __FUNCT__ 1958 #define __FUNCT__ "MatLoad_MPIDense" 1959 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1960 { 1961 PetscScalar *vals,*svals; 1962 MPI_Comm comm = ((PetscObject)viewer)->comm; 1963 MPI_Status status; 1964 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1965 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1966 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1967 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1968 int fd; 1969 PetscErrorCode ierr; 1970 1971 PetscFunctionBegin; 1972 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1973 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1974 if (!rank) { 1975 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1976 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1977 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1978 } 1979 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1980 1981 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1982 M = header[1]; N = header[2]; nz = header[3]; 1983 1984 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1985 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1986 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1987 1988 /* If global sizes are set, check if they are consistent with that given in the file */ 1989 if (sizesset) { 1990 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1991 } 1992 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1993 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1994 1995 /* 1996 Handle case where matrix is stored on disk as a dense matrix 1997 */ 1998 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1999 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 2000 PetscFunctionReturn(0); 2001 } 2002 2003 /* determine ownership of all rows */ 2004 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 2005 else m = PetscMPIIntCast(newmat->rmap->n); 2006 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2007 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2008 rowners[0] = 0; 2009 for (i=2; i<=size; i++) { 2010 rowners[i] += rowners[i-1]; 2011 } 2012 rstart = rowners[rank]; 2013 rend = rowners[rank+1]; 2014 2015 /* distribute row lengths to all processors */ 2016 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 2017 if (!rank) { 2018 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2019 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2020 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2021 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2022 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2023 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2024 } else { 2025 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2026 } 2027 2028 if (!rank) { 2029 /* calculate the number of nonzeros on each processor */ 2030 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2031 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2032 for (i=0; i<size; i++) { 2033 for (j=rowners[i]; j< rowners[i+1]; j++) { 2034 procsnz[i] += rowlengths[j]; 2035 } 2036 } 2037 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2038 2039 /* determine max buffer needed and allocate it */ 2040 maxnz = 0; 2041 for (i=0; i<size; i++) { 2042 maxnz = PetscMax(maxnz,procsnz[i]); 2043 } 2044 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2045 2046 /* read in my part of the matrix column indices */ 2047 nz = procsnz[0]; 2048 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2049 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2050 2051 /* read in every one elses and ship off */ 2052 for (i=1; i<size; i++) { 2053 nz = procsnz[i]; 2054 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2055 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2056 } 2057 ierr = PetscFree(cols);CHKERRQ(ierr); 2058 } else { 2059 /* determine buffer space needed for message */ 2060 nz = 0; 2061 for (i=0; i<m; i++) { 2062 nz += ourlens[i]; 2063 } 2064 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2065 2066 /* receive message of column indices*/ 2067 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2068 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2069 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2070 } 2071 2072 /* loop over local rows, determining number of off diagonal entries */ 2073 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2074 jj = 0; 2075 for (i=0; i<m; i++) { 2076 for (j=0; j<ourlens[i]; j++) { 2077 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2078 jj++; 2079 } 2080 } 2081 2082 /* create our matrix */ 2083 for (i=0; i<m; i++) { 2084 ourlens[i] -= offlens[i]; 2085 } 2086 2087 if (!sizesset) { 2088 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2089 } 2090 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 2091 for (i=0; i<m; i++) { 2092 ourlens[i] += offlens[i]; 2093 } 2094 2095 if (!rank) { 2096 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2097 2098 /* read in my part of the matrix numerical values */ 2099 nz = procsnz[0]; 2100 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2101 2102 /* insert into matrix */ 2103 jj = rstart; 2104 smycols = mycols; 2105 svals = vals; 2106 for (i=0; i<m; i++) { 2107 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2108 smycols += ourlens[i]; 2109 svals += ourlens[i]; 2110 jj++; 2111 } 2112 2113 /* read in other processors and ship out */ 2114 for (i=1; i<size; i++) { 2115 nz = procsnz[i]; 2116 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2117 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2118 } 2119 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2120 } else { 2121 /* receive numeric values */ 2122 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2123 2124 /* receive message of values*/ 2125 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2126 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2127 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2128 2129 /* insert into matrix */ 2130 jj = rstart; 2131 smycols = mycols; 2132 svals = vals; 2133 for (i=0; i<m; i++) { 2134 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2135 smycols += ourlens[i]; 2136 svals += ourlens[i]; 2137 jj++; 2138 } 2139 } 2140 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2141 ierr = PetscFree(vals);CHKERRQ(ierr); 2142 ierr = PetscFree(mycols);CHKERRQ(ierr); 2143 ierr = PetscFree(rowners);CHKERRQ(ierr); 2144 2145 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2146 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatEqual_MPIDense" 2152 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2153 { 2154 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2155 Mat a,b; 2156 PetscBool flg; 2157 PetscErrorCode ierr; 2158 2159 PetscFunctionBegin; 2160 a = matA->A; 2161 b = matB->A; 2162 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2163 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #if defined(PETSC_HAVE_PLAPACK) 2168 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2171 /*@C 2172 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2173 Level: developer 2174 2175 .keywords: Petsc, destroy, package, PLAPACK 2176 .seealso: PetscFinalize() 2177 @*/ 2178 PetscErrorCode PetscPLAPACKFinalizePackage(void) 2179 { 2180 PetscErrorCode ierr; 2181 2182 PetscFunctionBegin; 2183 ierr = PLA_Finalize();CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 #undef __FUNCT__ 2188 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2189 /*@C 2190 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2191 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2192 2193 Input Parameter: 2194 . comm - the communicator the matrix lives on 2195 2196 Level: developer 2197 2198 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2199 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2200 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2201 cannot overlap. 2202 2203 .keywords: Petsc, initialize, package, PLAPACK 2204 .seealso: PetscSysInitializePackage(), PetscInitialize() 2205 @*/ 2206 PetscErrorCode PetscPLAPACKInitializePackage(MPI_Comm comm) 2207 { 2208 PetscMPIInt size; 2209 PetscErrorCode ierr; 2210 2211 PetscFunctionBegin; 2212 if (!PLA_Initialized(PETSC_NULL)) { 2213 2214 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2215 Plapack_nprows = 1; 2216 Plapack_npcols = size; 2217 2218 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2219 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2220 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2221 #if defined(PETSC_USE_DEBUG) 2222 Plapack_ierror = 3; 2223 #else 2224 Plapack_ierror = 0; 2225 #endif 2226 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2227 if (Plapack_ierror){ 2228 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2229 } else { 2230 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2231 } 2232 2233 Plapack_nb_alg = 0; 2234 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2235 if (Plapack_nb_alg) { 2236 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2237 } 2238 PetscOptionsEnd(); 2239 2240 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2241 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2242 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2243 } 2244 PetscFunctionReturn(0); 2245 } 2246 2247 #endif 2248