1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #if defined(PETSC_HAVE_PLAPACK) 9 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 10 static MPI_Comm Plapack_comm_2d; 11 EXTERN_C_BEGIN 12 #include <PLA.h> 13 EXTERN_C_END 14 15 typedef struct { 16 PLA_Obj A,pivots; 17 PLA_Template templ; 18 MPI_Datatype datatype; 19 PetscInt nb,rstart; 20 VecScatter ctx; 21 IS is_pla,is_petsc; 22 PetscBool pla_solved; 23 MatStructure mstruct; 24 } Mat_Plapack; 25 #endif 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "MatDenseGetLocalMatrix" 29 /*@ 30 31 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 32 matrix that represents the operator. For sequential matrices it returns itself. 33 34 Input Parameter: 35 . A - the Seq or MPI dense matrix 36 37 Output Parameter: 38 . B - the inner matrix 39 40 Level: intermediate 41 42 @*/ 43 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscBool flg; 48 49 PetscFunctionBegin; 50 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 51 if (flg) { 52 *B = mat->A; 53 } else { 54 *B = A; 55 } 56 PetscFunctionReturn(0); 57 } 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatGetRow_MPIDense" 61 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 62 { 63 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 66 67 PetscFunctionBegin; 68 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 69 lrow = row - rstart; 70 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatRestoreRow_MPIDense" 76 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 77 { 78 PetscErrorCode ierr; 79 80 PetscFunctionBegin; 81 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 82 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 83 PetscFunctionReturn(0); 84 } 85 86 EXTERN_C_BEGIN 87 #undef __FUNCT__ 88 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 89 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 90 { 91 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 92 PetscErrorCode ierr; 93 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 94 PetscScalar *array; 95 MPI_Comm comm; 96 Mat B; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 100 101 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 102 if (!B) { 103 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 104 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 105 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 106 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 107 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 108 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 109 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 110 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 111 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 113 *a = B; 114 ierr = MatDestroy(&B);CHKERRQ(ierr); 115 } else { 116 *a = B; 117 } 118 PetscFunctionReturn(0); 119 } 120 EXTERN_C_END 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatSetValues_MPIDense" 124 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 125 { 126 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 127 PetscErrorCode ierr; 128 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 129 PetscBool roworiented = A->roworiented; 130 131 PetscFunctionBegin; 132 if (v) PetscValidScalarPointer(v,6); 133 for (i=0; i<m; i++) { 134 if (idxm[i] < 0) continue; 135 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 136 if (idxm[i] >= rstart && idxm[i] < rend) { 137 row = idxm[i] - rstart; 138 if (roworiented) { 139 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 140 } else { 141 for (j=0; j<n; j++) { 142 if (idxn[j] < 0) continue; 143 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 144 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 145 } 146 } 147 } else { 148 if (!A->donotstash) { 149 if (roworiented) { 150 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 151 } else { 152 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 153 } 154 } 155 } 156 } 157 PetscFunctionReturn(0); 158 } 159 160 #undef __FUNCT__ 161 #define __FUNCT__ "MatGetValues_MPIDense" 162 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 163 { 164 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 165 PetscErrorCode ierr; 166 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 167 168 PetscFunctionBegin; 169 for (i=0; i<m; i++) { 170 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 171 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 172 if (idxm[i] >= rstart && idxm[i] < rend) { 173 row = idxm[i] - rstart; 174 for (j=0; j<n; j++) { 175 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 176 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 177 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 178 } 179 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 180 } 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatGetArray_MPIDense" 186 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 187 { 188 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 189 PetscErrorCode ierr; 190 191 PetscFunctionBegin; 192 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 193 PetscFunctionReturn(0); 194 } 195 196 #undef __FUNCT__ 197 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 198 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 199 { 200 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 201 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 202 PetscErrorCode ierr; 203 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 204 const PetscInt *irow,*icol; 205 PetscScalar *av,*bv,*v = lmat->v; 206 Mat newmat; 207 IS iscol_local; 208 209 PetscFunctionBegin; 210 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 211 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 212 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 213 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 214 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 215 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 216 217 /* No parallel redistribution currently supported! Should really check each index set 218 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 219 original matrix! */ 220 221 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 222 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 223 224 /* Check submatrix call */ 225 if (scall == MAT_REUSE_MATRIX) { 226 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 227 /* Really need to test rows and column sizes! */ 228 newmat = *B; 229 } else { 230 /* Create and fill new matrix */ 231 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 232 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 233 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 234 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 235 } 236 237 /* Now extract the data pointers and do the copy, column at a time */ 238 newmatd = (Mat_MPIDense*)newmat->data; 239 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 240 241 for (i=0; i<Ncols; i++) { 242 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 243 for (j=0; j<nrows; j++) { 244 *bv++ = av[irow[j] - rstart]; 245 } 246 } 247 248 /* Assemble the matrices so that the correct flags are set */ 249 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 250 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 251 252 /* Free work space */ 253 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 254 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 255 *B = newmat; 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatRestoreArray_MPIDense" 261 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 262 { 263 PetscFunctionBegin; 264 PetscFunctionReturn(0); 265 } 266 267 #undef __FUNCT__ 268 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 269 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 270 { 271 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 272 MPI_Comm comm = ((PetscObject)mat)->comm; 273 PetscErrorCode ierr; 274 PetscInt nstash,reallocs; 275 InsertMode addv; 276 277 PetscFunctionBegin; 278 /* make sure all processors are either in INSERTMODE or ADDMODE */ 279 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 280 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 281 mat->insertmode = addv; /* in case this processor had no cache */ 282 283 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 284 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 285 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 #undef __FUNCT__ 290 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 291 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 292 { 293 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 294 PetscErrorCode ierr; 295 PetscInt i,*row,*col,flg,j,rstart,ncols; 296 PetscMPIInt n; 297 PetscScalar *val; 298 InsertMode addv=mat->insertmode; 299 300 PetscFunctionBegin; 301 /* wait on receives */ 302 while (1) { 303 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 304 if (!flg) break; 305 306 for (i=0; i<n;) { 307 /* Now identify the consecutive vals belonging to the same row */ 308 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 309 if (j < n) ncols = j-i; 310 else ncols = n-i; 311 /* Now assemble all these values with a single function call */ 312 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 313 i = j; 314 } 315 } 316 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 317 318 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 319 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 320 321 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 322 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 323 } 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatZeroEntries_MPIDense" 329 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 330 { 331 PetscErrorCode ierr; 332 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 333 334 PetscFunctionBegin; 335 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338 339 /* the code does not do the diagonal entries correctly unless the 340 matrix is square and the column and row owerships are identical. 341 This is a BUG. The only way to fix it seems to be to access 342 mdn->A and mdn->B directly and not through the MatZeroRows() 343 routine. 344 */ 345 #undef __FUNCT__ 346 #define __FUNCT__ "MatZeroRows_MPIDense" 347 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 348 { 349 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 350 PetscErrorCode ierr; 351 PetscInt i,*owners = A->rmap->range; 352 PetscInt *nprocs,j,idx,nsends; 353 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 354 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 355 PetscInt *lens,*lrows,*values; 356 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 357 MPI_Comm comm = ((PetscObject)A)->comm; 358 MPI_Request *send_waits,*recv_waits; 359 MPI_Status recv_status,*send_status; 360 PetscBool found; 361 const PetscScalar *xx; 362 PetscScalar *bb; 363 364 PetscFunctionBegin; 365 /* first count number of contributors to each processor */ 366 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 367 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 368 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 369 for (i=0; i<N; i++) { 370 idx = rows[i]; 371 found = PETSC_FALSE; 372 for (j=0; j<size; j++) { 373 if (idx >= owners[j] && idx < owners[j+1]) { 374 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 375 } 376 } 377 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 378 } 379 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 380 381 /* inform other processors of number of messages and max length*/ 382 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 383 384 /* post receives: */ 385 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 386 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 387 for (i=0; i<nrecvs; i++) { 388 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 389 } 390 391 /* do sends: 392 1) starts[i] gives the starting index in svalues for stuff going to 393 the ith processor 394 */ 395 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 396 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 397 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 398 starts[0] = 0; 399 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 400 for (i=0; i<N; i++) { 401 svalues[starts[owner[i]]++] = rows[i]; 402 } 403 404 starts[0] = 0; 405 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 406 count = 0; 407 for (i=0; i<size; i++) { 408 if (nprocs[2*i+1]) { 409 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 410 } 411 } 412 ierr = PetscFree(starts);CHKERRQ(ierr); 413 414 base = owners[rank]; 415 416 /* wait on receives */ 417 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 418 count = nrecvs; 419 slen = 0; 420 while (count) { 421 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 422 /* unpack receives into our local space */ 423 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 424 source[imdex] = recv_status.MPI_SOURCE; 425 lens[imdex] = n; 426 slen += n; 427 count--; 428 } 429 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 430 431 /* move the data into the send scatter */ 432 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 433 count = 0; 434 for (i=0; i<nrecvs; i++) { 435 values = rvalues + i*nmax; 436 for (j=0; j<lens[i]; j++) { 437 lrows[count++] = values[j] - base; 438 } 439 } 440 ierr = PetscFree(rvalues);CHKERRQ(ierr); 441 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 442 ierr = PetscFree(owner);CHKERRQ(ierr); 443 ierr = PetscFree(nprocs);CHKERRQ(ierr); 444 445 /* fix right hand side if needed */ 446 if (x && b) { 447 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 448 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 449 for (i=0; i<slen; i++) { 450 bb[lrows[i]] = diag*xx[lrows[i]]; 451 } 452 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 453 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 454 } 455 456 /* actually zap the local rows */ 457 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 458 ierr = PetscFree(lrows);CHKERRQ(ierr); 459 460 /* wait on sends */ 461 if (nsends) { 462 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 463 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 464 ierr = PetscFree(send_status);CHKERRQ(ierr); 465 } 466 ierr = PetscFree(send_waits);CHKERRQ(ierr); 467 ierr = PetscFree(svalues);CHKERRQ(ierr); 468 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatMult_MPIDense" 474 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 475 { 476 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 477 PetscErrorCode ierr; 478 479 PetscFunctionBegin; 480 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 481 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 482 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 483 PetscFunctionReturn(0); 484 } 485 486 #undef __FUNCT__ 487 #define __FUNCT__ "MatMultAdd_MPIDense" 488 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 489 { 490 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 491 PetscErrorCode ierr; 492 493 PetscFunctionBegin; 494 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 495 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 496 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMultTranspose_MPIDense" 502 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 503 { 504 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 505 PetscErrorCode ierr; 506 PetscScalar zero = 0.0; 507 508 PetscFunctionBegin; 509 ierr = VecSet(yy,zero);CHKERRQ(ierr); 510 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 511 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 512 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 513 PetscFunctionReturn(0); 514 } 515 516 #undef __FUNCT__ 517 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 518 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 519 { 520 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 521 PetscErrorCode ierr; 522 523 PetscFunctionBegin; 524 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 525 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 526 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 527 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 528 PetscFunctionReturn(0); 529 } 530 531 #undef __FUNCT__ 532 #define __FUNCT__ "MatGetDiagonal_MPIDense" 533 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 534 { 535 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 536 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 537 PetscErrorCode ierr; 538 PetscInt len,i,n,m = A->rmap->n,radd; 539 PetscScalar *x,zero = 0.0; 540 541 PetscFunctionBegin; 542 ierr = VecSet(v,zero);CHKERRQ(ierr); 543 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 544 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 545 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 546 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 547 radd = A->rmap->rstart*m; 548 for (i=0; i<len; i++) { 549 x[i] = aloc->v[radd + i*m + i]; 550 } 551 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatDestroy_MPIDense" 557 PetscErrorCode MatDestroy_MPIDense(Mat mat) 558 { 559 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 560 PetscErrorCode ierr; 561 #if defined(PETSC_HAVE_PLAPACK) 562 Mat_Plapack *lu=(Mat_Plapack*)mat->spptr; 563 #endif 564 565 PetscFunctionBegin; 566 567 #if defined(PETSC_USE_LOG) 568 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 569 #endif 570 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 571 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 572 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 573 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 574 #if defined(PETSC_HAVE_PLAPACK) 575 if (lu) { 576 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 577 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 578 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 579 ierr = ISDestroy(&lu->is_pla);CHKERRQ(ierr); 580 ierr = ISDestroy(&lu->is_petsc);CHKERRQ(ierr); 581 ierr = VecScatterDestroy(&lu->ctx);CHKERRQ(ierr); 582 } 583 ierr = PetscFree(mat->spptr);CHKERRQ(ierr); 584 #endif 585 586 ierr = PetscFree(mat->data);CHKERRQ(ierr); 587 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 593 PetscFunctionReturn(0); 594 } 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatView_MPIDense_Binary" 598 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 599 { 600 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 601 PetscErrorCode ierr; 602 PetscViewerFormat format; 603 int fd; 604 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 605 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 606 PetscScalar *work,*v,*vv; 607 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 608 MPI_Status status; 609 610 PetscFunctionBegin; 611 if (mdn->size == 1) { 612 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 613 } else { 614 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 615 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 616 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 617 618 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 619 if (format == PETSC_VIEWER_NATIVE) { 620 621 if (!rank) { 622 /* store the matrix as a dense matrix */ 623 header[0] = MAT_FILE_CLASSID; 624 header[1] = mat->rmap->N; 625 header[2] = N; 626 header[3] = MATRIX_BINARY_FORMAT_DENSE; 627 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 628 629 /* get largest work array needed for transposing array */ 630 mmax = mat->rmap->n; 631 for (i=1; i<size; i++) { 632 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 633 } 634 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 635 636 /* write out local array, by rows */ 637 m = mat->rmap->n; 638 v = a->v; 639 for (j=0; j<N; j++) { 640 for (i=0; i<m; i++) { 641 work[j + i*N] = *v++; 642 } 643 } 644 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 645 /* get largest work array to receive messages from other processes, excludes process zero */ 646 mmax = 0; 647 for (i=1; i<size; i++) { 648 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 649 } 650 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 651 for(k = 1; k < size; k++) { 652 v = vv; 653 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 654 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 655 656 for(j = 0; j < N; j++) { 657 for(i = 0; i < m; i++) { 658 work[j + i*N] = *v++; 659 } 660 } 661 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 662 } 663 ierr = PetscFree(work);CHKERRQ(ierr); 664 ierr = PetscFree(vv);CHKERRQ(ierr); 665 } else { 666 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 667 } 668 } else { 669 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 670 } 671 } 672 PetscFunctionReturn(0); 673 } 674 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 677 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 678 { 679 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 680 PetscErrorCode ierr; 681 PetscMPIInt size = mdn->size,rank = mdn->rank; 682 const PetscViewerType vtype; 683 PetscBool iascii,isdraw; 684 PetscViewer sviewer; 685 PetscViewerFormat format; 686 #if defined(PETSC_HAVE_PLAPACK) 687 Mat_Plapack *lu; 688 #endif 689 690 PetscFunctionBegin; 691 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 692 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 693 if (iascii) { 694 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 695 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 696 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 697 MatInfo info; 698 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 699 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 700 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 701 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 702 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 703 #if defined(PETSC_HAVE_PLAPACK) 704 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 705 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 706 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 707 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 708 if (mat->factortype){ 709 lu=(Mat_Plapack*)(mat->spptr); 710 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 711 } 712 #else 713 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 714 #endif 715 PetscFunctionReturn(0); 716 } else if (format == PETSC_VIEWER_ASCII_INFO) { 717 PetscFunctionReturn(0); 718 } 719 } else if (isdraw) { 720 PetscDraw draw; 721 PetscBool isnull; 722 723 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 724 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 725 if (isnull) PetscFunctionReturn(0); 726 } 727 728 if (size == 1) { 729 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 730 } else { 731 /* assemble the entire matrix onto first processor. */ 732 Mat A; 733 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 734 PetscInt *cols; 735 PetscScalar *vals; 736 737 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 738 if (!rank) { 739 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 740 } else { 741 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 742 } 743 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 744 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 745 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 746 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 747 748 /* Copy the matrix ... This isn't the most efficient means, 749 but it's quick for now */ 750 A->insertmode = INSERT_VALUES; 751 row = mat->rmap->rstart; m = mdn->A->rmap->n; 752 for (i=0; i<m; i++) { 753 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 754 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 755 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 756 row++; 757 } 758 759 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 760 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 761 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 762 if (!rank) { 763 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 764 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 765 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 766 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 767 } 768 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 769 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 770 ierr = MatDestroy(&A);CHKERRQ(ierr); 771 } 772 PetscFunctionReturn(0); 773 } 774 775 #undef __FUNCT__ 776 #define __FUNCT__ "MatView_MPIDense" 777 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 778 { 779 PetscErrorCode ierr; 780 PetscBool iascii,isbinary,isdraw,issocket; 781 782 PetscFunctionBegin; 783 784 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 785 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 786 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 787 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 788 789 if (iascii || issocket || isdraw) { 790 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 791 } else if (isbinary) { 792 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 793 } else { 794 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 #undef __FUNCT__ 800 #define __FUNCT__ "MatGetInfo_MPIDense" 801 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 802 { 803 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 804 Mat mdn = mat->A; 805 PetscErrorCode ierr; 806 PetscReal isend[5],irecv[5]; 807 808 PetscFunctionBegin; 809 info->block_size = 1.0; 810 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 811 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 812 isend[3] = info->memory; isend[4] = info->mallocs; 813 if (flag == MAT_LOCAL) { 814 info->nz_used = isend[0]; 815 info->nz_allocated = isend[1]; 816 info->nz_unneeded = isend[2]; 817 info->memory = isend[3]; 818 info->mallocs = isend[4]; 819 } else if (flag == MAT_GLOBAL_MAX) { 820 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 821 info->nz_used = irecv[0]; 822 info->nz_allocated = irecv[1]; 823 info->nz_unneeded = irecv[2]; 824 info->memory = irecv[3]; 825 info->mallocs = irecv[4]; 826 } else if (flag == MAT_GLOBAL_SUM) { 827 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 828 info->nz_used = irecv[0]; 829 info->nz_allocated = irecv[1]; 830 info->nz_unneeded = irecv[2]; 831 info->memory = irecv[3]; 832 info->mallocs = irecv[4]; 833 } 834 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 835 info->fill_ratio_needed = 0; 836 info->factor_mallocs = 0; 837 PetscFunctionReturn(0); 838 } 839 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatSetOption_MPIDense" 842 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 843 { 844 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 845 PetscErrorCode ierr; 846 847 PetscFunctionBegin; 848 switch (op) { 849 case MAT_NEW_NONZERO_LOCATIONS: 850 case MAT_NEW_NONZERO_LOCATION_ERR: 851 case MAT_NEW_NONZERO_ALLOCATION_ERR: 852 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 853 break; 854 case MAT_ROW_ORIENTED: 855 a->roworiented = flg; 856 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 857 break; 858 case MAT_NEW_DIAGONALS: 859 case MAT_USE_HASH_TABLE: 860 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 861 break; 862 case MAT_IGNORE_OFF_PROC_ENTRIES: 863 a->donotstash = flg; 864 break; 865 case MAT_SYMMETRIC: 866 case MAT_STRUCTURALLY_SYMMETRIC: 867 case MAT_HERMITIAN: 868 case MAT_SYMMETRY_ETERNAL: 869 case MAT_IGNORE_LOWER_TRIANGULAR: 870 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 871 break; 872 default: 873 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 874 } 875 PetscFunctionReturn(0); 876 } 877 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "MatDiagonalScale_MPIDense" 881 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 882 { 883 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 884 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 885 PetscScalar *l,*r,x,*v; 886 PetscErrorCode ierr; 887 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 888 889 PetscFunctionBegin; 890 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 891 if (ll) { 892 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 893 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 894 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 895 for (i=0; i<m; i++) { 896 x = l[i]; 897 v = mat->v + i; 898 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 899 } 900 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 901 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 902 } 903 if (rr) { 904 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 905 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 906 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 907 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 909 for (i=0; i<n; i++) { 910 x = r[i]; 911 v = mat->v + i*m; 912 for (j=0; j<m; j++) { (*v++) *= x;} 913 } 914 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 915 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 916 } 917 PetscFunctionReturn(0); 918 } 919 920 #undef __FUNCT__ 921 #define __FUNCT__ "MatNorm_MPIDense" 922 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 923 { 924 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 925 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 926 PetscErrorCode ierr; 927 PetscInt i,j; 928 PetscReal sum = 0.0; 929 PetscScalar *v = mat->v; 930 931 PetscFunctionBegin; 932 if (mdn->size == 1) { 933 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 934 } else { 935 if (type == NORM_FROBENIUS) { 936 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 937 #if defined(PETSC_USE_COMPLEX) 938 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 939 #else 940 sum += (*v)*(*v); v++; 941 #endif 942 } 943 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 944 *nrm = sqrt(*nrm); 945 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 946 } else if (type == NORM_1) { 947 PetscReal *tmp,*tmp2; 948 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 949 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 950 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 951 *nrm = 0.0; 952 v = mat->v; 953 for (j=0; j<mdn->A->cmap->n; j++) { 954 for (i=0; i<mdn->A->rmap->n; i++) { 955 tmp[j] += PetscAbsScalar(*v); v++; 956 } 957 } 958 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 959 for (j=0; j<A->cmap->N; j++) { 960 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 961 } 962 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 963 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 964 } else if (type == NORM_INFINITY) { /* max row norm */ 965 PetscReal ntemp; 966 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 967 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 968 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 969 } 970 PetscFunctionReturn(0); 971 } 972 973 #undef __FUNCT__ 974 #define __FUNCT__ "MatTranspose_MPIDense" 975 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 976 { 977 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 978 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 979 Mat B; 980 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 981 PetscErrorCode ierr; 982 PetscInt j,i; 983 PetscScalar *v; 984 985 PetscFunctionBegin; 986 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 987 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 988 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 989 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 990 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 991 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 992 } else { 993 B = *matout; 994 } 995 996 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 997 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 998 for (i=0; i<m; i++) rwork[i] = rstart + i; 999 for (j=0; j<n; j++) { 1000 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 1001 v += m; 1002 } 1003 ierr = PetscFree(rwork);CHKERRQ(ierr); 1004 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1005 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1006 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1007 *matout = B; 1008 } else { 1009 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 1015 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1016 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1020 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1021 { 1022 PetscErrorCode ierr; 1023 1024 PetscFunctionBegin; 1025 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #if defined(PETSC_HAVE_PLAPACK) 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1033 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1034 { 1035 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1036 PetscErrorCode ierr; 1037 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1038 PetscScalar *array; 1039 PetscReal one = 1.0; 1040 1041 PetscFunctionBegin; 1042 /* Copy A into F->lu->A */ 1043 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1044 ierr = PLA_API_begin();CHKERRQ(ierr); 1045 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1046 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1047 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1048 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1049 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1050 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1051 ierr = PLA_API_end();CHKERRQ(ierr); 1052 lu->rstart = rstart; 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1058 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1059 { 1060 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1061 PetscErrorCode ierr; 1062 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1063 PetscScalar *array; 1064 PetscReal one = 1.0; 1065 1066 PetscFunctionBegin; 1067 /* Copy F into A->lu->A */ 1068 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1069 ierr = PLA_API_begin();CHKERRQ(ierr); 1070 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1071 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1072 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1073 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1074 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1075 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1076 ierr = PLA_API_end();CHKERRQ(ierr); 1077 lu->rstart = rstart; 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1083 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1084 { 1085 PetscErrorCode ierr; 1086 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1087 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1088 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1089 PLA_Obj alpha = NULL,beta = NULL; 1090 1091 PetscFunctionBegin; 1092 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1093 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1094 1095 /* 1096 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1097 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1098 */ 1099 1100 /* do the multiply in PLA */ 1101 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1102 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1103 CHKMEMQ; 1104 1105 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1106 CHKMEMQ; 1107 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1108 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1109 1110 /* 1111 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1112 */ 1113 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1119 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1120 { 1121 PetscErrorCode ierr; 1122 PetscInt m=A->rmap->n,n=B->cmap->n; 1123 Mat Cmat; 1124 1125 PetscFunctionBegin; 1126 if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1127 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1128 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1129 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1130 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1131 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1132 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1133 1134 *C = Cmat; 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1140 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1141 { 1142 PetscErrorCode ierr; 1143 1144 PetscFunctionBegin; 1145 if (scall == MAT_INITIAL_MATRIX){ 1146 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1147 } 1148 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatSolve_MPIDense" 1154 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1155 { 1156 MPI_Comm comm = ((PetscObject)A)->comm; 1157 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1158 PetscErrorCode ierr; 1159 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1160 PetscScalar *array; 1161 PetscReal one = 1.0; 1162 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1163 PLA_Obj v_pla = NULL; 1164 PetscScalar *loc_buf; 1165 Vec loc_x; 1166 1167 PetscFunctionBegin; 1168 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1169 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1170 1171 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1172 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1173 PLA_Obj_set_to_zero(v_pla); 1174 1175 /* Copy b into rhs_pla */ 1176 PLA_API_begin(); 1177 PLA_Obj_API_open(v_pla); 1178 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1179 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1180 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1181 PLA_Obj_API_close(v_pla); 1182 PLA_API_end(); 1183 1184 if (A->factortype == MAT_FACTOR_LU){ 1185 /* Apply the permutations to the right hand sides */ 1186 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1187 1188 /* Solve L y = b, overwriting b with y */ 1189 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1190 1191 /* Solve U x = y (=b), overwriting b with x */ 1192 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1193 } else { /* MAT_FACTOR_CHOLESKY */ 1194 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1195 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1196 PLA_NONUNIT_DIAG,lu->A,v_pla); 1197 } 1198 1199 /* Copy PLAPACK x into Petsc vector x */ 1200 PLA_Obj_local_length(v_pla, &loc_m); 1201 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1202 PLA_Obj_local_stride(v_pla, &loc_stride); 1203 /* 1204 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1205 */ 1206 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1207 if (!lu->pla_solved){ 1208 1209 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1210 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1211 1212 /* Create IS and cts for VecScatterring */ 1213 PLA_Obj_local_length(v_pla, &loc_m); 1214 PLA_Obj_local_stride(v_pla, &loc_stride); 1215 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1216 1217 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1218 for (i=0; i<loc_m; i+=lu->nb){ 1219 j = 0; 1220 while (j < lu->nb && i+j < loc_m){ 1221 idx_petsc[i+j] = rstart + j; j++; 1222 } 1223 rstart += size*lu->nb; 1224 } 1225 1226 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1227 1228 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);CHKERRQ(ierr); 1229 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);CHKERRQ(ierr); 1230 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1231 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1232 } 1233 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1234 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 1236 /* Free data */ 1237 ierr = VecDestroy(&loc_x);CHKERRQ(ierr); 1238 PLA_Obj_free(&v_pla); 1239 1240 lu->pla_solved = PETSC_TRUE; 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1246 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1247 { 1248 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1249 PetscErrorCode ierr; 1250 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1251 PetscInt info_pla=0; 1252 PetscScalar *array,one = 1.0; 1253 1254 PetscFunctionBegin; 1255 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1256 PLA_Obj_free(&lu->A); 1257 PLA_Obj_free (&lu->pivots); 1258 } 1259 /* Create PLAPACK matrix object */ 1260 lu->A = NULL; lu->pivots = NULL; 1261 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1262 PLA_Obj_set_to_zero(lu->A); 1263 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1264 1265 /* Copy A into lu->A */ 1266 PLA_API_begin(); 1267 PLA_Obj_API_open(lu->A); 1268 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1269 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1270 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1271 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1272 PLA_Obj_API_close(lu->A); 1273 PLA_API_end(); 1274 1275 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1276 info_pla = PLA_LU(lu->A,lu->pivots); 1277 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1278 1279 lu->rstart = rstart; 1280 lu->mstruct = SAME_NONZERO_PATTERN; 1281 F->ops->solve = MatSolve_MPIDense; 1282 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1288 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1289 { 1290 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1291 PetscErrorCode ierr; 1292 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1293 PetscInt info_pla=0; 1294 PetscScalar *array,one = 1.0; 1295 1296 PetscFunctionBegin; 1297 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1298 PLA_Obj_free(&lu->A); 1299 } 1300 /* Create PLAPACK matrix object */ 1301 lu->A = NULL; 1302 lu->pivots = NULL; 1303 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1304 1305 /* Copy A into lu->A */ 1306 PLA_API_begin(); 1307 PLA_Obj_API_open(lu->A); 1308 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1309 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1310 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1311 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1312 PLA_Obj_API_close(lu->A); 1313 PLA_API_end(); 1314 1315 /* Factor P A -> Chol */ 1316 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1317 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1318 1319 lu->rstart = rstart; 1320 lu->mstruct = SAME_NONZERO_PATTERN; 1321 F->ops->solve = MatSolve_MPIDense; 1322 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1323 PetscFunctionReturn(0); 1324 } 1325 1326 /* Note the Petsc perm permutation is ignored */ 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1329 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1330 { 1331 PetscErrorCode ierr; 1332 PetscBool issymmetric,set; 1333 1334 PetscFunctionBegin; 1335 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1336 if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1337 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1338 PetscFunctionReturn(0); 1339 } 1340 1341 /* Note the Petsc r and c permutations are ignored */ 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1344 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1345 { 1346 PetscErrorCode ierr; 1347 PetscInt M = A->rmap->N; 1348 Mat_Plapack *lu; 1349 1350 PetscFunctionBegin; 1351 lu = (Mat_Plapack*)F->spptr; 1352 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1353 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1354 PetscFunctionReturn(0); 1355 } 1356 1357 EXTERN_C_BEGIN 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1360 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1361 { 1362 PetscFunctionBegin; 1363 *type = MATSOLVERPLAPACK; 1364 PetscFunctionReturn(0); 1365 } 1366 EXTERN_C_END 1367 1368 EXTERN_C_BEGIN 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1371 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1372 { 1373 PetscErrorCode ierr; 1374 Mat_Plapack *lu; 1375 PetscMPIInt size; 1376 PetscInt M=A->rmap->N; 1377 1378 PetscFunctionBegin; 1379 /* Create the factorization matrix */ 1380 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1381 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1382 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1383 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1384 (*F)->spptr = (void*)lu; 1385 1386 /* Set default Plapack parameters */ 1387 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1388 lu->nb = M/size; 1389 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1390 1391 /* Set runtime options */ 1392 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1393 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1394 PetscOptionsEnd(); 1395 1396 /* Create object distribution template */ 1397 lu->templ = NULL; 1398 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1399 1400 /* Set the datatype */ 1401 #if defined(PETSC_USE_COMPLEX) 1402 lu->datatype = MPI_DOUBLE_COMPLEX; 1403 #else 1404 lu->datatype = MPI_DOUBLE; 1405 #endif 1406 1407 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1408 1409 1410 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1411 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1412 1413 if (ftype == MAT_FACTOR_LU) { 1414 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1415 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1416 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1417 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1418 (*F)->factortype = ftype; 1419 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 EXTERN_C_END 1423 #endif 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1427 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1428 { 1429 #if defined(PETSC_HAVE_PLAPACK) 1430 PetscErrorCode ierr; 1431 #endif 1432 1433 PetscFunctionBegin; 1434 #if defined(PETSC_HAVE_PLAPACK) 1435 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1436 #else 1437 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1438 #endif 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatAXPY_MPIDense" 1444 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1445 { 1446 PetscErrorCode ierr; 1447 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1448 1449 PetscFunctionBegin; 1450 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1451 PetscFunctionReturn(0); 1452 } 1453 1454 #undef __FUNCT__ 1455 #define __FUNCT__ "MatConjugate_MPIDense" 1456 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1457 { 1458 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1459 PetscErrorCode ierr; 1460 1461 PetscFunctionBegin; 1462 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatRealPart_MPIDense" 1468 PetscErrorCode MatRealPart_MPIDense(Mat A) 1469 { 1470 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1471 PetscErrorCode ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1480 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1481 { 1482 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1483 PetscErrorCode ierr; 1484 1485 PetscFunctionBegin; 1486 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1487 PetscFunctionReturn(0); 1488 } 1489 1490 /* -------------------------------------------------------------------*/ 1491 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1492 MatGetRow_MPIDense, 1493 MatRestoreRow_MPIDense, 1494 MatMult_MPIDense, 1495 /* 4*/ MatMultAdd_MPIDense, 1496 MatMultTranspose_MPIDense, 1497 MatMultTransposeAdd_MPIDense, 1498 0, 1499 0, 1500 0, 1501 /*10*/ 0, 1502 0, 1503 0, 1504 0, 1505 MatTranspose_MPIDense, 1506 /*15*/ MatGetInfo_MPIDense, 1507 MatEqual_MPIDense, 1508 MatGetDiagonal_MPIDense, 1509 MatDiagonalScale_MPIDense, 1510 MatNorm_MPIDense, 1511 /*20*/ MatAssemblyBegin_MPIDense, 1512 MatAssemblyEnd_MPIDense, 1513 MatSetOption_MPIDense, 1514 MatZeroEntries_MPIDense, 1515 /*24*/ MatZeroRows_MPIDense, 1516 0, 1517 0, 1518 0, 1519 0, 1520 /*29*/ MatSetUpPreallocation_MPIDense, 1521 0, 1522 0, 1523 MatGetArray_MPIDense, 1524 MatRestoreArray_MPIDense, 1525 /*34*/ MatDuplicate_MPIDense, 1526 0, 1527 0, 1528 0, 1529 0, 1530 /*39*/ MatAXPY_MPIDense, 1531 MatGetSubMatrices_MPIDense, 1532 0, 1533 MatGetValues_MPIDense, 1534 0, 1535 /*44*/ 0, 1536 MatScale_MPIDense, 1537 0, 1538 0, 1539 0, 1540 /*49*/ 0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 /*54*/ 0, 1546 0, 1547 0, 1548 0, 1549 0, 1550 /*59*/ MatGetSubMatrix_MPIDense, 1551 MatDestroy_MPIDense, 1552 MatView_MPIDense, 1553 0, 1554 0, 1555 /*64*/ 0, 1556 0, 1557 0, 1558 0, 1559 0, 1560 /*69*/ 0, 1561 0, 1562 0, 1563 0, 1564 0, 1565 /*74*/ 0, 1566 0, 1567 0, 1568 0, 1569 0, 1570 /*79*/ 0, 1571 0, 1572 0, 1573 0, 1574 /*83*/ MatLoad_MPIDense, 1575 0, 1576 0, 1577 0, 1578 0, 1579 0, 1580 /*89*/ 1581 #if defined(PETSC_HAVE_PLAPACK) 1582 MatMatMult_MPIDense_MPIDense, 1583 MatMatMultSymbolic_MPIDense_MPIDense, 1584 MatMatMultNumeric_MPIDense_MPIDense, 1585 #else 1586 0, 1587 0, 1588 0, 1589 #endif 1590 0, 1591 0, 1592 /*94*/ 0, 1593 0, 1594 0, 1595 0, 1596 0, 1597 /*99*/ 0, 1598 0, 1599 0, 1600 MatConjugate_MPIDense, 1601 0, 1602 /*104*/0, 1603 MatRealPart_MPIDense, 1604 MatImaginaryPart_MPIDense, 1605 0, 1606 0, 1607 /*109*/0, 1608 0, 1609 0, 1610 0, 1611 0, 1612 /*114*/0, 1613 0, 1614 0, 1615 0, 1616 0, 1617 /*119*/0, 1618 0, 1619 0, 1620 0 1621 }; 1622 1623 EXTERN_C_BEGIN 1624 #undef __FUNCT__ 1625 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1626 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1627 { 1628 Mat_MPIDense *a; 1629 PetscErrorCode ierr; 1630 1631 PetscFunctionBegin; 1632 mat->preallocated = PETSC_TRUE; 1633 /* Note: For now, when data is specified above, this assumes the user correctly 1634 allocates the local dense storage space. We should add error checking. */ 1635 1636 a = (Mat_MPIDense*)mat->data; 1637 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1638 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1639 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1640 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1641 a->nvec = mat->cmap->n; 1642 1643 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1644 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1645 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1646 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1647 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1648 PetscFunctionReturn(0); 1649 } 1650 EXTERN_C_END 1651 1652 /*MC 1653 MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1654 1655 run ./configure with the option --download-plapack 1656 1657 1658 Options Database Keys: 1659 . -mat_plapack_nprows <n> - number of rows in processor partition 1660 . -mat_plapack_npcols <n> - number of columns in processor partition 1661 . -mat_plapack_nb <n> - block size of template vector 1662 . -mat_plapack_nb_alg <n> - algorithmic block size 1663 - -mat_plapack_ckerror <n> - error checking flag 1664 1665 Level: intermediate 1666 1667 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1668 1669 M*/ 1670 1671 EXTERN_C_BEGIN 1672 #undef __FUNCT__ 1673 #define __FUNCT__ "MatCreate_MPIDense" 1674 PetscErrorCode MatCreate_MPIDense(Mat mat) 1675 { 1676 Mat_MPIDense *a; 1677 PetscErrorCode ierr; 1678 1679 PetscFunctionBegin; 1680 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1681 mat->data = (void*)a; 1682 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1683 1684 mat->insertmode = NOT_SET_VALUES; 1685 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1686 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1687 1688 /* build cache for off array entries formed */ 1689 a->donotstash = PETSC_FALSE; 1690 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1691 1692 /* stuff used for matrix vector multiply */ 1693 a->lvec = 0; 1694 a->Mvctx = 0; 1695 a->roworiented = PETSC_TRUE; 1696 1697 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1698 "MatGetDiagonalBlock_MPIDense", 1699 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1700 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1701 "MatMPIDenseSetPreallocation_MPIDense", 1702 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1703 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1704 "MatMatMult_MPIAIJ_MPIDense", 1705 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1706 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1707 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1708 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1709 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1710 "MatMatMultNumeric_MPIAIJ_MPIDense", 1711 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1712 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1713 "MatGetFactor_mpidense_petsc", 1714 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1715 #if defined(PETSC_HAVE_PLAPACK) 1716 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1717 "MatGetFactor_mpidense_plapack", 1718 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1719 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1720 #endif 1721 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1722 1723 PetscFunctionReturn(0); 1724 } 1725 EXTERN_C_END 1726 1727 /*MC 1728 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1729 1730 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1731 and MATMPIDENSE otherwise. 1732 1733 Options Database Keys: 1734 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1735 1736 Level: beginner 1737 1738 1739 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1740 M*/ 1741 1742 EXTERN_C_BEGIN 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatCreate_Dense" 1745 PetscErrorCode MatCreate_Dense(Mat A) 1746 { 1747 PetscErrorCode ierr; 1748 PetscMPIInt size; 1749 1750 PetscFunctionBegin; 1751 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1752 if (size == 1) { 1753 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1754 } else { 1755 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 EXTERN_C_END 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1763 /*@C 1764 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1765 1766 Not collective 1767 1768 Input Parameters: 1769 . A - the matrix 1770 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1771 to control all matrix memory allocation. 1772 1773 Notes: 1774 The dense format is fully compatible with standard Fortran 77 1775 storage by columns. 1776 1777 The data input variable is intended primarily for Fortran programmers 1778 who wish to allocate their own matrix memory space. Most users should 1779 set data=PETSC_NULL. 1780 1781 Level: intermediate 1782 1783 .keywords: matrix,dense, parallel 1784 1785 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1786 @*/ 1787 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1788 { 1789 PetscErrorCode ierr; 1790 1791 PetscFunctionBegin; 1792 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr); 1793 PetscFunctionReturn(0); 1794 } 1795 1796 #undef __FUNCT__ 1797 #define __FUNCT__ "MatCreateMPIDense" 1798 /*@C 1799 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1800 1801 Collective on MPI_Comm 1802 1803 Input Parameters: 1804 + comm - MPI communicator 1805 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1806 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1807 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1808 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1809 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1810 to control all matrix memory allocation. 1811 1812 Output Parameter: 1813 . A - the matrix 1814 1815 Notes: 1816 The dense format is fully compatible with standard Fortran 77 1817 storage by columns. 1818 1819 The data input variable is intended primarily for Fortran programmers 1820 who wish to allocate their own matrix memory space. Most users should 1821 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1822 1823 The user MUST specify either the local or global matrix dimensions 1824 (possibly both). 1825 1826 Level: intermediate 1827 1828 .keywords: matrix,dense, parallel 1829 1830 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1831 @*/ 1832 PetscErrorCode MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1833 { 1834 PetscErrorCode ierr; 1835 PetscMPIInt size; 1836 1837 PetscFunctionBegin; 1838 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1839 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1840 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1841 if (size > 1) { 1842 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1843 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1844 } else { 1845 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1846 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1847 } 1848 PetscFunctionReturn(0); 1849 } 1850 1851 #undef __FUNCT__ 1852 #define __FUNCT__ "MatDuplicate_MPIDense" 1853 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1854 { 1855 Mat mat; 1856 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1857 PetscErrorCode ierr; 1858 1859 PetscFunctionBegin; 1860 *newmat = 0; 1861 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1862 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1863 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1864 a = (Mat_MPIDense*)mat->data; 1865 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1866 1867 mat->factortype = A->factortype; 1868 mat->assembled = PETSC_TRUE; 1869 mat->preallocated = PETSC_TRUE; 1870 1871 a->size = oldmat->size; 1872 a->rank = oldmat->rank; 1873 mat->insertmode = NOT_SET_VALUES; 1874 a->nvec = oldmat->nvec; 1875 a->donotstash = oldmat->donotstash; 1876 1877 ierr = PetscLayoutCopy(A->rmap,&mat->rmap);CHKERRQ(ierr); 1878 ierr = PetscLayoutCopy(A->cmap,&mat->cmap);CHKERRQ(ierr); 1879 1880 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1881 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1882 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1883 1884 *newmat = mat; 1885 PetscFunctionReturn(0); 1886 } 1887 1888 #undef __FUNCT__ 1889 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1890 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1891 { 1892 PetscErrorCode ierr; 1893 PetscMPIInt rank,size; 1894 PetscInt *rowners,i,m,nz,j; 1895 PetscScalar *array,*vals,*vals_ptr; 1896 MPI_Status status; 1897 1898 PetscFunctionBegin; 1899 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1900 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1901 1902 /* determine ownership of all rows */ 1903 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1904 else m = newmat->rmap->n; 1905 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1906 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1907 rowners[0] = 0; 1908 for (i=2; i<=size; i++) { 1909 rowners[i] += rowners[i-1]; 1910 } 1911 1912 if (!sizesset) { 1913 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1914 } 1915 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1916 ierr = MatGetArray(newmat,&array);CHKERRQ(ierr); 1917 1918 if (!rank) { 1919 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1920 1921 /* read in my part of the matrix numerical values */ 1922 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1923 1924 /* insert into matrix-by row (this is why cannot directly read into array */ 1925 vals_ptr = vals; 1926 for (i=0; i<m; i++) { 1927 for (j=0; j<N; j++) { 1928 array[i + j*m] = *vals_ptr++; 1929 } 1930 } 1931 1932 /* read in other processors and ship out */ 1933 for (i=1; i<size; i++) { 1934 nz = (rowners[i+1] - rowners[i])*N; 1935 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1936 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1937 } 1938 } else { 1939 /* receive numeric values */ 1940 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1941 1942 /* receive message of values*/ 1943 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr); 1944 1945 /* insert into matrix-by row (this is why cannot directly read into array */ 1946 vals_ptr = vals; 1947 for (i=0; i<m; i++) { 1948 for (j=0; j<N; j++) { 1949 array[i + j*m] = *vals_ptr++; 1950 } 1951 } 1952 } 1953 ierr = PetscFree(rowners);CHKERRQ(ierr); 1954 ierr = PetscFree(vals);CHKERRQ(ierr); 1955 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1956 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatLoad_MPIDense" 1962 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1963 { 1964 PetscScalar *vals,*svals; 1965 MPI_Comm comm = ((PetscObject)viewer)->comm; 1966 MPI_Status status; 1967 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1968 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1969 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1970 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1971 int fd; 1972 PetscErrorCode ierr; 1973 1974 PetscFunctionBegin; 1975 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1976 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1977 if (!rank) { 1978 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1979 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1980 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1981 } 1982 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1983 1984 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1985 M = header[1]; N = header[2]; nz = header[3]; 1986 1987 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1988 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1989 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1990 1991 /* If global sizes are set, check if they are consistent with that given in the file */ 1992 if (sizesset) { 1993 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1994 } 1995 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1996 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1997 1998 /* 1999 Handle case where matrix is stored on disk as a dense matrix 2000 */ 2001 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 2002 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 2003 PetscFunctionReturn(0); 2004 } 2005 2006 /* determine ownership of all rows */ 2007 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 2008 else m = PetscMPIIntCast(newmat->rmap->n); 2009 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2010 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2011 rowners[0] = 0; 2012 for (i=2; i<=size; i++) { 2013 rowners[i] += rowners[i-1]; 2014 } 2015 rstart = rowners[rank]; 2016 rend = rowners[rank+1]; 2017 2018 /* distribute row lengths to all processors */ 2019 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 2020 if (!rank) { 2021 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2022 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2023 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2024 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2025 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2026 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2027 } else { 2028 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2029 } 2030 2031 if (!rank) { 2032 /* calculate the number of nonzeros on each processor */ 2033 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2034 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2035 for (i=0; i<size; i++) { 2036 for (j=rowners[i]; j< rowners[i+1]; j++) { 2037 procsnz[i] += rowlengths[j]; 2038 } 2039 } 2040 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2041 2042 /* determine max buffer needed and allocate it */ 2043 maxnz = 0; 2044 for (i=0; i<size; i++) { 2045 maxnz = PetscMax(maxnz,procsnz[i]); 2046 } 2047 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2048 2049 /* read in my part of the matrix column indices */ 2050 nz = procsnz[0]; 2051 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2052 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2053 2054 /* read in every one elses and ship off */ 2055 for (i=1; i<size; i++) { 2056 nz = procsnz[i]; 2057 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2058 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2059 } 2060 ierr = PetscFree(cols);CHKERRQ(ierr); 2061 } else { 2062 /* determine buffer space needed for message */ 2063 nz = 0; 2064 for (i=0; i<m; i++) { 2065 nz += ourlens[i]; 2066 } 2067 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2068 2069 /* receive message of column indices*/ 2070 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2071 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2072 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2073 } 2074 2075 /* loop over local rows, determining number of off diagonal entries */ 2076 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2077 jj = 0; 2078 for (i=0; i<m; i++) { 2079 for (j=0; j<ourlens[i]; j++) { 2080 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2081 jj++; 2082 } 2083 } 2084 2085 /* create our matrix */ 2086 for (i=0; i<m; i++) { 2087 ourlens[i] -= offlens[i]; 2088 } 2089 2090 if (!sizesset) { 2091 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2092 } 2093 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 2094 for (i=0; i<m; i++) { 2095 ourlens[i] += offlens[i]; 2096 } 2097 2098 if (!rank) { 2099 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2100 2101 /* read in my part of the matrix numerical values */ 2102 nz = procsnz[0]; 2103 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2104 2105 /* insert into matrix */ 2106 jj = rstart; 2107 smycols = mycols; 2108 svals = vals; 2109 for (i=0; i<m; i++) { 2110 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2111 smycols += ourlens[i]; 2112 svals += ourlens[i]; 2113 jj++; 2114 } 2115 2116 /* read in other processors and ship out */ 2117 for (i=1; i<size; i++) { 2118 nz = procsnz[i]; 2119 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2120 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2121 } 2122 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2123 } else { 2124 /* receive numeric values */ 2125 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2126 2127 /* receive message of values*/ 2128 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2129 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2130 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2131 2132 /* insert into matrix */ 2133 jj = rstart; 2134 smycols = mycols; 2135 svals = vals; 2136 for (i=0; i<m; i++) { 2137 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2138 smycols += ourlens[i]; 2139 svals += ourlens[i]; 2140 jj++; 2141 } 2142 } 2143 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2144 ierr = PetscFree(vals);CHKERRQ(ierr); 2145 ierr = PetscFree(mycols);CHKERRQ(ierr); 2146 ierr = PetscFree(rowners);CHKERRQ(ierr); 2147 2148 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2149 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2150 PetscFunctionReturn(0); 2151 } 2152 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatEqual_MPIDense" 2155 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2156 { 2157 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2158 Mat a,b; 2159 PetscBool flg; 2160 PetscErrorCode ierr; 2161 2162 PetscFunctionBegin; 2163 a = matA->A; 2164 b = matB->A; 2165 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2166 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 #if defined(PETSC_HAVE_PLAPACK) 2171 2172 #undef __FUNCT__ 2173 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2174 /*@C 2175 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2176 Level: developer 2177 2178 .keywords: Petsc, destroy, package, PLAPACK 2179 .seealso: PetscFinalize() 2180 @*/ 2181 PetscErrorCode PetscPLAPACKFinalizePackage(void) 2182 { 2183 PetscErrorCode ierr; 2184 2185 PetscFunctionBegin; 2186 ierr = PLA_Finalize();CHKERRQ(ierr); 2187 PetscFunctionReturn(0); 2188 } 2189 2190 #undef __FUNCT__ 2191 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2192 /*@C 2193 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2194 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2195 2196 Input Parameter: 2197 . comm - the communicator the matrix lives on 2198 2199 Level: developer 2200 2201 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2202 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2203 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2204 cannot overlap. 2205 2206 .keywords: Petsc, initialize, package, PLAPACK 2207 .seealso: PetscSysInitializePackage(), PetscInitialize() 2208 @*/ 2209 PetscErrorCode PetscPLAPACKInitializePackage(MPI_Comm comm) 2210 { 2211 PetscMPIInt size; 2212 PetscErrorCode ierr; 2213 2214 PetscFunctionBegin; 2215 if (!PLA_Initialized(PETSC_NULL)) { 2216 2217 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2218 Plapack_nprows = 1; 2219 Plapack_npcols = size; 2220 2221 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2222 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2223 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2224 #if defined(PETSC_USE_DEBUG) 2225 Plapack_ierror = 3; 2226 #else 2227 Plapack_ierror = 0; 2228 #endif 2229 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2230 if (Plapack_ierror){ 2231 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2232 } else { 2233 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2234 } 2235 2236 Plapack_nb_alg = 0; 2237 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2238 if (Plapack_nb_alg) { 2239 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2240 } 2241 PetscOptionsEnd(); 2242 2243 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2244 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2245 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2246 } 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #endif 2251