1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #if defined(PETSC_HAVE_PLAPACK) 9 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 10 static MPI_Comm Plapack_comm_2d; 11 EXTERN_C_BEGIN 12 #include <PLA.h> 13 EXTERN_C_END 14 15 typedef struct { 16 PLA_Obj A,pivots; 17 PLA_Template templ; 18 MPI_Datatype datatype; 19 PetscInt nb,rstart; 20 VecScatter ctx; 21 IS is_pla,is_petsc; 22 PetscBool pla_solved; 23 MatStructure mstruct; 24 } Mat_Plapack; 25 #endif 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "MatDenseGetLocalMatrix" 29 /*@ 30 31 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 32 matrix that represents the operator. For sequential matrices it returns itself. 33 34 Input Parameter: 35 . A - the Seq or MPI dense matrix 36 37 Output Parameter: 38 . B - the inner matrix 39 40 Level: intermediate 41 42 @*/ 43 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscBool flg; 48 49 PetscFunctionBegin; 50 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 51 if (flg) { 52 *B = mat->A; 53 } else { 54 *B = A; 55 } 56 PetscFunctionReturn(0); 57 } 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatGetRow_MPIDense" 61 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 62 { 63 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 66 67 PetscFunctionBegin; 68 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 69 lrow = row - rstart; 70 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatRestoreRow_MPIDense" 76 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 77 { 78 PetscErrorCode ierr; 79 80 PetscFunctionBegin; 81 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 82 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 83 PetscFunctionReturn(0); 84 } 85 86 EXTERN_C_BEGIN 87 #undef __FUNCT__ 88 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 89 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 90 { 91 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 92 PetscErrorCode ierr; 93 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 94 PetscScalar *array; 95 MPI_Comm comm; 96 Mat B; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 100 101 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 102 if (!B) { 103 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 104 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 105 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 106 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 107 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 108 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 109 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 110 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 111 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 113 *a = B; 114 ierr = MatDestroy(&B);CHKERRQ(ierr); 115 } else { 116 *a = B; 117 } 118 PetscFunctionReturn(0); 119 } 120 EXTERN_C_END 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatSetValues_MPIDense" 124 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 125 { 126 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 127 PetscErrorCode ierr; 128 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 129 PetscBool roworiented = A->roworiented; 130 131 PetscFunctionBegin; 132 if (v) PetscValidScalarPointer(v,6); 133 for (i=0; i<m; i++) { 134 if (idxm[i] < 0) continue; 135 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 136 if (idxm[i] >= rstart && idxm[i] < rend) { 137 row = idxm[i] - rstart; 138 if (roworiented) { 139 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 140 } else { 141 for (j=0; j<n; j++) { 142 if (idxn[j] < 0) continue; 143 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 144 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 145 } 146 } 147 } else { 148 if (!A->donotstash) { 149 if (roworiented) { 150 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 151 } else { 152 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 153 } 154 } 155 } 156 } 157 PetscFunctionReturn(0); 158 } 159 160 #undef __FUNCT__ 161 #define __FUNCT__ "MatGetValues_MPIDense" 162 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 163 { 164 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 165 PetscErrorCode ierr; 166 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 167 168 PetscFunctionBegin; 169 for (i=0; i<m; i++) { 170 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 171 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 172 if (idxm[i] >= rstart && idxm[i] < rend) { 173 row = idxm[i] - rstart; 174 for (j=0; j<n; j++) { 175 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 176 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 177 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 178 } 179 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 180 } 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatGetArray_MPIDense" 186 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 187 { 188 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 189 PetscErrorCode ierr; 190 191 PetscFunctionBegin; 192 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 193 PetscFunctionReturn(0); 194 } 195 196 #undef __FUNCT__ 197 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 198 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 199 { 200 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 201 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 202 PetscErrorCode ierr; 203 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 204 const PetscInt *irow,*icol; 205 PetscScalar *av,*bv,*v = lmat->v; 206 Mat newmat; 207 IS iscol_local; 208 209 PetscFunctionBegin; 210 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 211 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 212 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 213 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 214 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 215 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 216 217 /* No parallel redistribution currently supported! Should really check each index set 218 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 219 original matrix! */ 220 221 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 222 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 223 224 /* Check submatrix call */ 225 if (scall == MAT_REUSE_MATRIX) { 226 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 227 /* Really need to test rows and column sizes! */ 228 newmat = *B; 229 } else { 230 /* Create and fill new matrix */ 231 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 232 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 233 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 234 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 235 } 236 237 /* Now extract the data pointers and do the copy, column at a time */ 238 newmatd = (Mat_MPIDense*)newmat->data; 239 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 240 241 for (i=0; i<Ncols; i++) { 242 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 243 for (j=0; j<nrows; j++) { 244 *bv++ = av[irow[j] - rstart]; 245 } 246 } 247 248 /* Assemble the matrices so that the correct flags are set */ 249 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 250 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 251 252 /* Free work space */ 253 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 254 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 255 *B = newmat; 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatRestoreArray_MPIDense" 261 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 262 { 263 PetscFunctionBegin; 264 PetscFunctionReturn(0); 265 } 266 267 #undef __FUNCT__ 268 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 269 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 270 { 271 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 272 MPI_Comm comm = ((PetscObject)mat)->comm; 273 PetscErrorCode ierr; 274 PetscInt nstash,reallocs; 275 InsertMode addv; 276 277 PetscFunctionBegin; 278 /* make sure all processors are either in INSERTMODE or ADDMODE */ 279 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 280 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 281 mat->insertmode = addv; /* in case this processor had no cache */ 282 283 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 284 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 285 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 #undef __FUNCT__ 290 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 291 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 292 { 293 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 294 PetscErrorCode ierr; 295 PetscInt i,*row,*col,flg,j,rstart,ncols; 296 PetscMPIInt n; 297 PetscScalar *val; 298 InsertMode addv=mat->insertmode; 299 300 PetscFunctionBegin; 301 /* wait on receives */ 302 while (1) { 303 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 304 if (!flg) break; 305 306 for (i=0; i<n;) { 307 /* Now identify the consecutive vals belonging to the same row */ 308 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 309 if (j < n) ncols = j-i; 310 else ncols = n-i; 311 /* Now assemble all these values with a single function call */ 312 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 313 i = j; 314 } 315 } 316 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 317 318 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 319 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 320 321 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 322 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 323 } 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatZeroEntries_MPIDense" 329 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 330 { 331 PetscErrorCode ierr; 332 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 333 334 PetscFunctionBegin; 335 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338 339 /* the code does not do the diagonal entries correctly unless the 340 matrix is square and the column and row owerships are identical. 341 This is a BUG. The only way to fix it seems to be to access 342 mdn->A and mdn->B directly and not through the MatZeroRows() 343 routine. 344 */ 345 #undef __FUNCT__ 346 #define __FUNCT__ "MatZeroRows_MPIDense" 347 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 348 { 349 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 350 PetscErrorCode ierr; 351 PetscInt i,*owners = A->rmap->range; 352 PetscInt *nprocs,j,idx,nsends; 353 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 354 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 355 PetscInt *lens,*lrows,*values; 356 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 357 MPI_Comm comm = ((PetscObject)A)->comm; 358 MPI_Request *send_waits,*recv_waits; 359 MPI_Status recv_status,*send_status; 360 PetscBool found; 361 const PetscScalar *xx; 362 PetscScalar *bb; 363 364 PetscFunctionBegin; 365 /* first count number of contributors to each processor */ 366 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 367 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 368 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 369 for (i=0; i<N; i++) { 370 idx = rows[i]; 371 found = PETSC_FALSE; 372 for (j=0; j<size; j++) { 373 if (idx >= owners[j] && idx < owners[j+1]) { 374 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 375 } 376 } 377 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 378 } 379 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 380 381 /* inform other processors of number of messages and max length*/ 382 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 383 384 /* post receives: */ 385 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 386 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 387 for (i=0; i<nrecvs; i++) { 388 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 389 } 390 391 /* do sends: 392 1) starts[i] gives the starting index in svalues for stuff going to 393 the ith processor 394 */ 395 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 396 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 397 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 398 starts[0] = 0; 399 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 400 for (i=0; i<N; i++) { 401 svalues[starts[owner[i]]++] = rows[i]; 402 } 403 404 starts[0] = 0; 405 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 406 count = 0; 407 for (i=0; i<size; i++) { 408 if (nprocs[2*i+1]) { 409 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 410 } 411 } 412 ierr = PetscFree(starts);CHKERRQ(ierr); 413 414 base = owners[rank]; 415 416 /* wait on receives */ 417 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 418 count = nrecvs; 419 slen = 0; 420 while (count) { 421 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 422 /* unpack receives into our local space */ 423 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 424 source[imdex] = recv_status.MPI_SOURCE; 425 lens[imdex] = n; 426 slen += n; 427 count--; 428 } 429 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 430 431 /* move the data into the send scatter */ 432 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 433 count = 0; 434 for (i=0; i<nrecvs; i++) { 435 values = rvalues + i*nmax; 436 for (j=0; j<lens[i]; j++) { 437 lrows[count++] = values[j] - base; 438 } 439 } 440 ierr = PetscFree(rvalues);CHKERRQ(ierr); 441 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 442 ierr = PetscFree(owner);CHKERRQ(ierr); 443 ierr = PetscFree(nprocs);CHKERRQ(ierr); 444 445 /* fix right hand side if needed */ 446 if (x && b) { 447 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 448 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 449 for (i=0; i<slen; i++) { 450 bb[lrows[i]] = diag*xx[lrows[i]]; 451 } 452 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 453 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 454 } 455 456 /* actually zap the local rows */ 457 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 458 ierr = PetscFree(lrows);CHKERRQ(ierr); 459 460 /* wait on sends */ 461 if (nsends) { 462 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 463 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 464 ierr = PetscFree(send_status);CHKERRQ(ierr); 465 } 466 ierr = PetscFree(send_waits);CHKERRQ(ierr); 467 ierr = PetscFree(svalues);CHKERRQ(ierr); 468 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatMult_MPIDense" 474 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 475 { 476 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 477 PetscErrorCode ierr; 478 479 PetscFunctionBegin; 480 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 481 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 482 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 483 PetscFunctionReturn(0); 484 } 485 486 #undef __FUNCT__ 487 #define __FUNCT__ "MatMultAdd_MPIDense" 488 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 489 { 490 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 491 PetscErrorCode ierr; 492 493 PetscFunctionBegin; 494 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 495 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 496 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMultTranspose_MPIDense" 502 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 503 { 504 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 505 PetscErrorCode ierr; 506 PetscScalar zero = 0.0; 507 508 PetscFunctionBegin; 509 ierr = VecSet(yy,zero);CHKERRQ(ierr); 510 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 511 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 512 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 513 PetscFunctionReturn(0); 514 } 515 516 #undef __FUNCT__ 517 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 518 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 519 { 520 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 521 PetscErrorCode ierr; 522 523 PetscFunctionBegin; 524 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 525 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 526 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 527 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 528 PetscFunctionReturn(0); 529 } 530 531 #undef __FUNCT__ 532 #define __FUNCT__ "MatGetDiagonal_MPIDense" 533 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 534 { 535 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 536 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 537 PetscErrorCode ierr; 538 PetscInt len,i,n,m = A->rmap->n,radd; 539 PetscScalar *x,zero = 0.0; 540 541 PetscFunctionBegin; 542 ierr = VecSet(v,zero);CHKERRQ(ierr); 543 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 544 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 545 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 546 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 547 radd = A->rmap->rstart*m; 548 for (i=0; i<len; i++) { 549 x[i] = aloc->v[radd + i*m + i]; 550 } 551 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatDestroy_MPIDense" 557 PetscErrorCode MatDestroy_MPIDense(Mat mat) 558 { 559 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 560 PetscErrorCode ierr; 561 #if defined(PETSC_HAVE_PLAPACK) 562 Mat_Plapack *lu=(Mat_Plapack*)mat->spptr; 563 #endif 564 565 PetscFunctionBegin; 566 567 #if defined(PETSC_USE_LOG) 568 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 569 #endif 570 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 571 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 572 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 573 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 574 #if defined(PETSC_HAVE_PLAPACK) 575 if (lu) { 576 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 577 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 578 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 579 ierr = ISDestroy(&lu->is_pla);CHKERRQ(ierr); 580 ierr = ISDestroy(&lu->is_petsc);CHKERRQ(ierr); 581 ierr = VecScatterDestroy(&lu->ctx);CHKERRQ(ierr); 582 } 583 ierr = PetscFree(mat->spptr);CHKERRQ(ierr); 584 #endif 585 586 ierr = PetscFree(mat->data);CHKERRQ(ierr); 587 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 593 PetscFunctionReturn(0); 594 } 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatView_MPIDense_Binary" 598 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 599 { 600 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 601 PetscErrorCode ierr; 602 PetscViewerFormat format; 603 int fd; 604 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 605 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 606 PetscScalar *work,*v,*vv; 607 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 608 MPI_Status status; 609 610 PetscFunctionBegin; 611 if (mdn->size == 1) { 612 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 613 } else { 614 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 615 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 616 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 617 618 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 619 if (format == PETSC_VIEWER_NATIVE) { 620 621 if (!rank) { 622 /* store the matrix as a dense matrix */ 623 header[0] = MAT_FILE_CLASSID; 624 header[1] = mat->rmap->N; 625 header[2] = N; 626 header[3] = MATRIX_BINARY_FORMAT_DENSE; 627 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 628 629 /* get largest work array needed for transposing array */ 630 mmax = mat->rmap->n; 631 for (i=1; i<size; i++) { 632 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 633 } 634 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 635 636 /* write out local array, by rows */ 637 m = mat->rmap->n; 638 v = a->v; 639 for (j=0; j<N; j++) { 640 for (i=0; i<m; i++) { 641 work[j + i*N] = *v++; 642 } 643 } 644 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 645 /* get largest work array to receive messages from other processes, excludes process zero */ 646 mmax = 0; 647 for (i=1; i<size; i++) { 648 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 649 } 650 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 651 for(k = 1; k < size; k++) { 652 v = vv; 653 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 654 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 655 656 for(j = 0; j < N; j++) { 657 for(i = 0; i < m; i++) { 658 work[j + i*N] = *v++; 659 } 660 } 661 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 662 } 663 ierr = PetscFree(work);CHKERRQ(ierr); 664 ierr = PetscFree(vv);CHKERRQ(ierr); 665 } else { 666 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 667 } 668 } else { 669 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 670 } 671 } 672 PetscFunctionReturn(0); 673 } 674 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 677 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 678 { 679 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 680 PetscErrorCode ierr; 681 PetscMPIInt size = mdn->size,rank = mdn->rank; 682 const PetscViewerType vtype; 683 PetscBool iascii,isdraw; 684 PetscViewer sviewer; 685 PetscViewerFormat format; 686 #if defined(PETSC_HAVE_PLAPACK) 687 Mat_Plapack *lu; 688 #endif 689 690 PetscFunctionBegin; 691 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 692 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 693 if (iascii) { 694 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 695 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 696 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 697 MatInfo info; 698 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 699 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 700 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 701 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 702 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 703 #if defined(PETSC_HAVE_PLAPACK) 704 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 705 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 706 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 707 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 708 if (mat->factortype){ 709 lu=(Mat_Plapack*)(mat->spptr); 710 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 711 } 712 #else 713 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 714 #endif 715 PetscFunctionReturn(0); 716 } else if (format == PETSC_VIEWER_ASCII_INFO) { 717 PetscFunctionReturn(0); 718 } 719 } else if (isdraw) { 720 PetscDraw draw; 721 PetscBool isnull; 722 723 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 724 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 725 if (isnull) PetscFunctionReturn(0); 726 } 727 728 if (size == 1) { 729 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 730 } else { 731 /* assemble the entire matrix onto first processor. */ 732 Mat A; 733 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 734 PetscInt *cols; 735 PetscScalar *vals; 736 737 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 738 if (!rank) { 739 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 740 } else { 741 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 742 } 743 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 744 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 745 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 746 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 747 748 /* Copy the matrix ... This isn't the most efficient means, 749 but it's quick for now */ 750 A->insertmode = INSERT_VALUES; 751 row = mat->rmap->rstart; m = mdn->A->rmap->n; 752 for (i=0; i<m; i++) { 753 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 754 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 755 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 756 row++; 757 } 758 759 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 760 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 761 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 762 if (!rank) { 763 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 764 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 765 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 766 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 767 } 768 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 769 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 770 ierr = MatDestroy(&A);CHKERRQ(ierr); 771 } 772 PetscFunctionReturn(0); 773 } 774 775 #undef __FUNCT__ 776 #define __FUNCT__ "MatView_MPIDense" 777 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 778 { 779 PetscErrorCode ierr; 780 PetscBool iascii,isbinary,isdraw,issocket; 781 782 PetscFunctionBegin; 783 784 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 785 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 786 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 787 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 788 789 if (iascii || issocket || isdraw) { 790 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 791 } else if (isbinary) { 792 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 793 } else { 794 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 #undef __FUNCT__ 800 #define __FUNCT__ "MatGetInfo_MPIDense" 801 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 802 { 803 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 804 Mat mdn = mat->A; 805 PetscErrorCode ierr; 806 PetscReal isend[5],irecv[5]; 807 808 PetscFunctionBegin; 809 info->block_size = 1.0; 810 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 811 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 812 isend[3] = info->memory; isend[4] = info->mallocs; 813 if (flag == MAT_LOCAL) { 814 info->nz_used = isend[0]; 815 info->nz_allocated = isend[1]; 816 info->nz_unneeded = isend[2]; 817 info->memory = isend[3]; 818 info->mallocs = isend[4]; 819 } else if (flag == MAT_GLOBAL_MAX) { 820 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 821 info->nz_used = irecv[0]; 822 info->nz_allocated = irecv[1]; 823 info->nz_unneeded = irecv[2]; 824 info->memory = irecv[3]; 825 info->mallocs = irecv[4]; 826 } else if (flag == MAT_GLOBAL_SUM) { 827 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 828 info->nz_used = irecv[0]; 829 info->nz_allocated = irecv[1]; 830 info->nz_unneeded = irecv[2]; 831 info->memory = irecv[3]; 832 info->mallocs = irecv[4]; 833 } 834 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 835 info->fill_ratio_needed = 0; 836 info->factor_mallocs = 0; 837 PetscFunctionReturn(0); 838 } 839 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatSetOption_MPIDense" 842 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 843 { 844 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 845 PetscErrorCode ierr; 846 847 PetscFunctionBegin; 848 switch (op) { 849 case MAT_NEW_NONZERO_LOCATIONS: 850 case MAT_NEW_NONZERO_LOCATION_ERR: 851 case MAT_NEW_NONZERO_ALLOCATION_ERR: 852 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 853 break; 854 case MAT_ROW_ORIENTED: 855 a->roworiented = flg; 856 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 857 break; 858 case MAT_NEW_DIAGONALS: 859 case MAT_USE_HASH_TABLE: 860 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 861 break; 862 case MAT_IGNORE_OFF_PROC_ENTRIES: 863 a->donotstash = flg; 864 break; 865 case MAT_SYMMETRIC: 866 case MAT_STRUCTURALLY_SYMMETRIC: 867 case MAT_HERMITIAN: 868 case MAT_SYMMETRY_ETERNAL: 869 case MAT_IGNORE_LOWER_TRIANGULAR: 870 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 871 break; 872 default: 873 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 874 } 875 PetscFunctionReturn(0); 876 } 877 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "MatDiagonalScale_MPIDense" 881 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 882 { 883 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 884 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 885 PetscScalar *l,*r,x,*v; 886 PetscErrorCode ierr; 887 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 888 889 PetscFunctionBegin; 890 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 891 if (ll) { 892 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 893 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 894 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 895 for (i=0; i<m; i++) { 896 x = l[i]; 897 v = mat->v + i; 898 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 899 } 900 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 901 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 902 } 903 if (rr) { 904 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 905 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 906 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 907 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 909 for (i=0; i<n; i++) { 910 x = r[i]; 911 v = mat->v + i*m; 912 for (j=0; j<m; j++) { (*v++) *= x;} 913 } 914 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 915 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 916 } 917 PetscFunctionReturn(0); 918 } 919 920 #undef __FUNCT__ 921 #define __FUNCT__ "MatNorm_MPIDense" 922 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 923 { 924 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 925 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 926 PetscErrorCode ierr; 927 PetscInt i,j; 928 PetscReal sum = 0.0; 929 PetscScalar *v = mat->v; 930 931 PetscFunctionBegin; 932 if (mdn->size == 1) { 933 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 934 } else { 935 if (type == NORM_FROBENIUS) { 936 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 937 #if defined(PETSC_USE_COMPLEX) 938 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 939 #else 940 sum += (*v)*(*v); v++; 941 #endif 942 } 943 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 944 *nrm = sqrt(*nrm); 945 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 946 } else if (type == NORM_1) { 947 PetscReal *tmp,*tmp2; 948 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 949 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 950 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 951 *nrm = 0.0; 952 v = mat->v; 953 for (j=0; j<mdn->A->cmap->n; j++) { 954 for (i=0; i<mdn->A->rmap->n; i++) { 955 tmp[j] += PetscAbsScalar(*v); v++; 956 } 957 } 958 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 959 for (j=0; j<A->cmap->N; j++) { 960 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 961 } 962 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 963 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 964 } else if (type == NORM_INFINITY) { /* max row norm */ 965 PetscReal ntemp; 966 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 967 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 968 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 969 } 970 PetscFunctionReturn(0); 971 } 972 973 #undef __FUNCT__ 974 #define __FUNCT__ "MatTranspose_MPIDense" 975 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 976 { 977 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 978 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 979 Mat B; 980 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 981 PetscErrorCode ierr; 982 PetscInt j,i; 983 PetscScalar *v; 984 985 PetscFunctionBegin; 986 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 987 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 988 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 989 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 990 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 991 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 992 } else { 993 B = *matout; 994 } 995 996 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 997 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 998 for (i=0; i<m; i++) rwork[i] = rstart + i; 999 for (j=0; j<n; j++) { 1000 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 1001 v += m; 1002 } 1003 ierr = PetscFree(rwork);CHKERRQ(ierr); 1004 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1005 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1006 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1007 *matout = B; 1008 } else { 1009 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 1015 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1016 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1020 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1021 { 1022 PetscErrorCode ierr; 1023 1024 PetscFunctionBegin; 1025 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #if defined(PETSC_HAVE_PLAPACK) 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1033 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1034 { 1035 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1036 PetscErrorCode ierr; 1037 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1038 PetscScalar *array; 1039 PetscReal one = 1.0; 1040 1041 PetscFunctionBegin; 1042 /* Copy A into F->lu->A */ 1043 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1044 ierr = PLA_API_begin();CHKERRQ(ierr); 1045 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1046 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1047 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1048 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1049 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1050 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1051 ierr = PLA_API_end();CHKERRQ(ierr); 1052 lu->rstart = rstart; 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1058 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1059 { 1060 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1061 PetscErrorCode ierr; 1062 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1063 PetscScalar *array; 1064 PetscReal one = 1.0; 1065 1066 PetscFunctionBegin; 1067 /* Copy F into A->lu->A */ 1068 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1069 ierr = PLA_API_begin();CHKERRQ(ierr); 1070 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1071 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1072 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1073 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1074 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1075 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1076 ierr = PLA_API_end();CHKERRQ(ierr); 1077 lu->rstart = rstart; 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1083 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1084 { 1085 PetscErrorCode ierr; 1086 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1087 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1088 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1089 PLA_Obj alpha = NULL,beta = NULL; 1090 1091 PetscFunctionBegin; 1092 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1093 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1094 1095 /* 1096 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1097 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1098 */ 1099 1100 /* do the multiply in PLA */ 1101 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1102 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1103 CHKMEMQ; 1104 1105 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1106 CHKMEMQ; 1107 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1108 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1109 1110 /* 1111 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1112 */ 1113 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1119 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1120 { 1121 PetscErrorCode ierr; 1122 PetscInt m=A->rmap->n,n=B->cmap->n; 1123 Mat Cmat; 1124 1125 PetscFunctionBegin; 1126 if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1127 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1128 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1129 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1130 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1131 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1132 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1133 1134 *C = Cmat; 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1140 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1141 { 1142 PetscErrorCode ierr; 1143 1144 PetscFunctionBegin; 1145 if (scall == MAT_INITIAL_MATRIX){ 1146 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1147 } 1148 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatSolve_MPIDense" 1154 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1155 { 1156 MPI_Comm comm = ((PetscObject)A)->comm; 1157 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1158 PetscErrorCode ierr; 1159 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1160 PetscScalar *array; 1161 PetscReal one = 1.0; 1162 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1163 PLA_Obj v_pla = NULL; 1164 PetscScalar *loc_buf; 1165 Vec loc_x; 1166 1167 PetscFunctionBegin; 1168 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1169 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1170 1171 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1172 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1173 PLA_Obj_set_to_zero(v_pla); 1174 1175 /* Copy b into rhs_pla */ 1176 PLA_API_begin(); 1177 PLA_Obj_API_open(v_pla); 1178 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1179 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1180 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1181 PLA_Obj_API_close(v_pla); 1182 PLA_API_end(); 1183 1184 if (A->factortype == MAT_FACTOR_LU){ 1185 /* Apply the permutations to the right hand sides */ 1186 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1187 1188 /* Solve L y = b, overwriting b with y */ 1189 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1190 1191 /* Solve U x = y (=b), overwriting b with x */ 1192 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1193 } else { /* MAT_FACTOR_CHOLESKY */ 1194 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1195 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1196 PLA_NONUNIT_DIAG,lu->A,v_pla); 1197 } 1198 1199 /* Copy PLAPACK x into Petsc vector x */ 1200 PLA_Obj_local_length(v_pla, &loc_m); 1201 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1202 PLA_Obj_local_stride(v_pla, &loc_stride); 1203 /* 1204 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1205 */ 1206 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1207 if (!lu->pla_solved){ 1208 1209 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1210 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1211 1212 /* Create IS and cts for VecScatterring */ 1213 PLA_Obj_local_length(v_pla, &loc_m); 1214 PLA_Obj_local_stride(v_pla, &loc_stride); 1215 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1216 1217 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1218 for (i=0; i<loc_m; i+=lu->nb){ 1219 j = 0; 1220 while (j < lu->nb && i+j < loc_m){ 1221 idx_petsc[i+j] = rstart + j; j++; 1222 } 1223 rstart += size*lu->nb; 1224 } 1225 1226 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1227 1228 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);CHKERRQ(ierr); 1229 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);CHKERRQ(ierr); 1230 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1231 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1232 } 1233 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1234 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 1236 /* Free data */ 1237 ierr = VecDestroy(&loc_x);CHKERRQ(ierr); 1238 PLA_Obj_free(&v_pla); 1239 1240 lu->pla_solved = PETSC_TRUE; 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1246 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1247 { 1248 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1249 PetscErrorCode ierr; 1250 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1251 PetscInt info_pla=0; 1252 PetscScalar *array,one = 1.0; 1253 1254 PetscFunctionBegin; 1255 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1256 PLA_Obj_free(&lu->A); 1257 PLA_Obj_free (&lu->pivots); 1258 } 1259 /* Create PLAPACK matrix object */ 1260 lu->A = NULL; lu->pivots = NULL; 1261 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1262 PLA_Obj_set_to_zero(lu->A); 1263 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1264 1265 /* Copy A into lu->A */ 1266 PLA_API_begin(); 1267 PLA_Obj_API_open(lu->A); 1268 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1269 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1270 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1271 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1272 PLA_Obj_API_close(lu->A); 1273 PLA_API_end(); 1274 1275 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1276 info_pla = PLA_LU(lu->A,lu->pivots); 1277 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1278 1279 lu->rstart = rstart; 1280 lu->mstruct = SAME_NONZERO_PATTERN; 1281 F->ops->solve = MatSolve_MPIDense; 1282 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1288 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1289 { 1290 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1291 PetscErrorCode ierr; 1292 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1293 PetscInt info_pla=0; 1294 PetscScalar *array,one = 1.0; 1295 1296 PetscFunctionBegin; 1297 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1298 PLA_Obj_free(&lu->A); 1299 } 1300 /* Create PLAPACK matrix object */ 1301 lu->A = NULL; 1302 lu->pivots = NULL; 1303 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1304 1305 /* Copy A into lu->A */ 1306 PLA_API_begin(); 1307 PLA_Obj_API_open(lu->A); 1308 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1309 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1310 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1311 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1312 PLA_Obj_API_close(lu->A); 1313 PLA_API_end(); 1314 1315 /* Factor P A -> Chol */ 1316 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1317 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1318 1319 lu->rstart = rstart; 1320 lu->mstruct = SAME_NONZERO_PATTERN; 1321 F->ops->solve = MatSolve_MPIDense; 1322 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1323 PetscFunctionReturn(0); 1324 } 1325 1326 /* Note the Petsc perm permutation is ignored */ 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1329 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1330 { 1331 PetscErrorCode ierr; 1332 PetscBool issymmetric,set; 1333 1334 PetscFunctionBegin; 1335 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1336 if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1337 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1338 PetscFunctionReturn(0); 1339 } 1340 1341 /* Note the Petsc r and c permutations are ignored */ 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1344 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1345 { 1346 PetscErrorCode ierr; 1347 PetscInt M = A->rmap->N; 1348 Mat_Plapack *lu; 1349 1350 PetscFunctionBegin; 1351 lu = (Mat_Plapack*)F->spptr; 1352 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1353 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1354 PetscFunctionReturn(0); 1355 } 1356 1357 EXTERN_C_BEGIN 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1360 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1361 { 1362 PetscFunctionBegin; 1363 *type = MATSOLVERPLAPACK; 1364 PetscFunctionReturn(0); 1365 } 1366 EXTERN_C_END 1367 1368 EXTERN_C_BEGIN 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1371 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1372 { 1373 PetscErrorCode ierr; 1374 Mat_Plapack *lu; 1375 PetscMPIInt size; 1376 PetscInt M=A->rmap->N; 1377 1378 PetscFunctionBegin; 1379 /* Create the factorization matrix */ 1380 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1381 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1382 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1383 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1384 (*F)->spptr = (void*)lu; 1385 1386 /* Set default Plapack parameters */ 1387 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1388 lu->nb = M/size; 1389 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1390 1391 /* Set runtime options */ 1392 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1393 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1394 PetscOptionsEnd(); 1395 1396 /* Create object distribution template */ 1397 lu->templ = NULL; 1398 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1399 1400 /* Set the datatype */ 1401 #if defined(PETSC_USE_COMPLEX) 1402 lu->datatype = MPI_DOUBLE_COMPLEX; 1403 #else 1404 lu->datatype = MPI_DOUBLE; 1405 #endif 1406 1407 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1408 1409 1410 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1411 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1412 1413 if (ftype == MAT_FACTOR_LU) { 1414 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1415 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1416 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1417 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1418 (*F)->factortype = ftype; 1419 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 EXTERN_C_END 1423 #endif 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1427 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1428 { 1429 #if defined(PETSC_HAVE_PLAPACK) 1430 PetscErrorCode ierr; 1431 #endif 1432 1433 PetscFunctionBegin; 1434 #if defined(PETSC_HAVE_PLAPACK) 1435 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1436 #else 1437 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1438 #endif 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatAXPY_MPIDense" 1444 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1445 { 1446 PetscErrorCode ierr; 1447 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1448 1449 PetscFunctionBegin; 1450 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1451 PetscFunctionReturn(0); 1452 } 1453 1454 #undef __FUNCT__ 1455 #define __FUNCT__ "MatConjugate_MPIDense" 1456 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1457 { 1458 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1459 PetscErrorCode ierr; 1460 1461 PetscFunctionBegin; 1462 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatRealPart_MPIDense" 1468 PetscErrorCode MatRealPart_MPIDense(Mat A) 1469 { 1470 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1471 PetscErrorCode ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1480 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1481 { 1482 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1483 PetscErrorCode ierr; 1484 1485 PetscFunctionBegin; 1486 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1487 PetscFunctionReturn(0); 1488 } 1489 1490 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1493 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1494 { 1495 PetscErrorCode ierr; 1496 PetscInt i,n; 1497 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1498 PetscReal *work; 1499 1500 PetscFunctionBegin; 1501 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 1502 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 1503 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1504 if (type == NORM_2) { 1505 for (i=0; i<n; i++) work[i] *= work[i]; 1506 } 1507 if (type == NORM_INFINITY) { 1508 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1509 } else { 1510 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1511 } 1512 ierr = PetscFree(work);CHKERRQ(ierr); 1513 if (type == NORM_2) { 1514 for (i=0; i<n; i++) norms[i] = sqrt(norms[i]); 1515 } 1516 PetscFunctionReturn(0); 1517 } 1518 1519 /* -------------------------------------------------------------------*/ 1520 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1521 MatGetRow_MPIDense, 1522 MatRestoreRow_MPIDense, 1523 MatMult_MPIDense, 1524 /* 4*/ MatMultAdd_MPIDense, 1525 MatMultTranspose_MPIDense, 1526 MatMultTransposeAdd_MPIDense, 1527 0, 1528 0, 1529 0, 1530 /*10*/ 0, 1531 0, 1532 0, 1533 0, 1534 MatTranspose_MPIDense, 1535 /*15*/ MatGetInfo_MPIDense, 1536 MatEqual_MPIDense, 1537 MatGetDiagonal_MPIDense, 1538 MatDiagonalScale_MPIDense, 1539 MatNorm_MPIDense, 1540 /*20*/ MatAssemblyBegin_MPIDense, 1541 MatAssemblyEnd_MPIDense, 1542 MatSetOption_MPIDense, 1543 MatZeroEntries_MPIDense, 1544 /*24*/ MatZeroRows_MPIDense, 1545 0, 1546 0, 1547 0, 1548 0, 1549 /*29*/ MatSetUpPreallocation_MPIDense, 1550 0, 1551 0, 1552 MatGetArray_MPIDense, 1553 MatRestoreArray_MPIDense, 1554 /*34*/ MatDuplicate_MPIDense, 1555 0, 1556 0, 1557 0, 1558 0, 1559 /*39*/ MatAXPY_MPIDense, 1560 MatGetSubMatrices_MPIDense, 1561 0, 1562 MatGetValues_MPIDense, 1563 0, 1564 /*44*/ 0, 1565 MatScale_MPIDense, 1566 0, 1567 0, 1568 0, 1569 /*49*/ 0, 1570 0, 1571 0, 1572 0, 1573 0, 1574 /*54*/ 0, 1575 0, 1576 0, 1577 0, 1578 0, 1579 /*59*/ MatGetSubMatrix_MPIDense, 1580 MatDestroy_MPIDense, 1581 MatView_MPIDense, 1582 0, 1583 0, 1584 /*64*/ 0, 1585 0, 1586 0, 1587 0, 1588 0, 1589 /*69*/ 0, 1590 0, 1591 0, 1592 0, 1593 0, 1594 /*74*/ 0, 1595 0, 1596 0, 1597 0, 1598 0, 1599 /*79*/ 0, 1600 0, 1601 0, 1602 0, 1603 /*83*/ MatLoad_MPIDense, 1604 0, 1605 0, 1606 0, 1607 0, 1608 0, 1609 /*89*/ 1610 #if defined(PETSC_HAVE_PLAPACK) 1611 MatMatMult_MPIDense_MPIDense, 1612 MatMatMultSymbolic_MPIDense_MPIDense, 1613 MatMatMultNumeric_MPIDense_MPIDense, 1614 #else 1615 0, 1616 0, 1617 0, 1618 #endif 1619 0, 1620 0, 1621 /*94*/ 0, 1622 0, 1623 0, 1624 0, 1625 0, 1626 /*99*/ 0, 1627 0, 1628 0, 1629 MatConjugate_MPIDense, 1630 0, 1631 /*104*/0, 1632 MatRealPart_MPIDense, 1633 MatImaginaryPart_MPIDense, 1634 0, 1635 0, 1636 /*109*/0, 1637 0, 1638 0, 1639 0, 1640 0, 1641 /*114*/0, 1642 0, 1643 0, 1644 0, 1645 0, 1646 /*119*/0, 1647 0, 1648 0, 1649 0, 1650 0, 1651 /*124*/0, 1652 MatGetColumnNorms_MPIDense 1653 }; 1654 1655 EXTERN_C_BEGIN 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1658 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1659 { 1660 Mat_MPIDense *a; 1661 PetscErrorCode ierr; 1662 1663 PetscFunctionBegin; 1664 mat->preallocated = PETSC_TRUE; 1665 /* Note: For now, when data is specified above, this assumes the user correctly 1666 allocates the local dense storage space. We should add error checking. */ 1667 1668 a = (Mat_MPIDense*)mat->data; 1669 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1670 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1671 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1672 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1673 a->nvec = mat->cmap->n; 1674 1675 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1676 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1677 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1678 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1679 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1680 PetscFunctionReturn(0); 1681 } 1682 EXTERN_C_END 1683 1684 /*MC 1685 MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1686 1687 run ./configure with the option --download-plapack 1688 1689 1690 Options Database Keys: 1691 . -mat_plapack_nprows <n> - number of rows in processor partition 1692 . -mat_plapack_npcols <n> - number of columns in processor partition 1693 . -mat_plapack_nb <n> - block size of template vector 1694 . -mat_plapack_nb_alg <n> - algorithmic block size 1695 - -mat_plapack_ckerror <n> - error checking flag 1696 1697 Level: intermediate 1698 1699 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1700 1701 M*/ 1702 1703 EXTERN_C_BEGIN 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatCreate_MPIDense" 1706 PetscErrorCode MatCreate_MPIDense(Mat mat) 1707 { 1708 Mat_MPIDense *a; 1709 PetscErrorCode ierr; 1710 1711 PetscFunctionBegin; 1712 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1713 mat->data = (void*)a; 1714 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1715 1716 mat->insertmode = NOT_SET_VALUES; 1717 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1718 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1719 1720 /* build cache for off array entries formed */ 1721 a->donotstash = PETSC_FALSE; 1722 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1723 1724 /* stuff used for matrix vector multiply */ 1725 a->lvec = 0; 1726 a->Mvctx = 0; 1727 a->roworiented = PETSC_TRUE; 1728 1729 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1730 "MatGetDiagonalBlock_MPIDense", 1731 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1732 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1733 "MatMPIDenseSetPreallocation_MPIDense", 1734 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1735 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1736 "MatMatMult_MPIAIJ_MPIDense", 1737 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1738 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1739 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1740 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1741 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1742 "MatMatMultNumeric_MPIAIJ_MPIDense", 1743 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1744 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1745 "MatGetFactor_mpidense_petsc", 1746 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1747 #if defined(PETSC_HAVE_PLAPACK) 1748 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1749 "MatGetFactor_mpidense_plapack", 1750 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1751 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1752 #endif 1753 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1754 1755 PetscFunctionReturn(0); 1756 } 1757 EXTERN_C_END 1758 1759 /*MC 1760 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1761 1762 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1763 and MATMPIDENSE otherwise. 1764 1765 Options Database Keys: 1766 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1767 1768 Level: beginner 1769 1770 1771 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1772 M*/ 1773 1774 #undef __FUNCT__ 1775 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1776 /*@C 1777 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1778 1779 Not collective 1780 1781 Input Parameters: 1782 . A - the matrix 1783 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1784 to control all matrix memory allocation. 1785 1786 Notes: 1787 The dense format is fully compatible with standard Fortran 77 1788 storage by columns. 1789 1790 The data input variable is intended primarily for Fortran programmers 1791 who wish to allocate their own matrix memory space. Most users should 1792 set data=PETSC_NULL. 1793 1794 Level: intermediate 1795 1796 .keywords: matrix,dense, parallel 1797 1798 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1799 @*/ 1800 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1801 { 1802 PetscErrorCode ierr; 1803 1804 PetscFunctionBegin; 1805 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatCreateMPIDense" 1811 /*@C 1812 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1813 1814 Collective on MPI_Comm 1815 1816 Input Parameters: 1817 + comm - MPI communicator 1818 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1819 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1820 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1821 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1822 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1823 to control all matrix memory allocation. 1824 1825 Output Parameter: 1826 . A - the matrix 1827 1828 Notes: 1829 The dense format is fully compatible with standard Fortran 77 1830 storage by columns. 1831 1832 The data input variable is intended primarily for Fortran programmers 1833 who wish to allocate their own matrix memory space. Most users should 1834 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1835 1836 The user MUST specify either the local or global matrix dimensions 1837 (possibly both). 1838 1839 Level: intermediate 1840 1841 .keywords: matrix,dense, parallel 1842 1843 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1844 @*/ 1845 PetscErrorCode MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1846 { 1847 PetscErrorCode ierr; 1848 PetscMPIInt size; 1849 1850 PetscFunctionBegin; 1851 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1852 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1853 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1854 if (size > 1) { 1855 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1856 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1857 } else { 1858 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1859 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1860 } 1861 PetscFunctionReturn(0); 1862 } 1863 1864 #undef __FUNCT__ 1865 #define __FUNCT__ "MatDuplicate_MPIDense" 1866 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1867 { 1868 Mat mat; 1869 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1870 PetscErrorCode ierr; 1871 1872 PetscFunctionBegin; 1873 *newmat = 0; 1874 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1875 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1876 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1877 a = (Mat_MPIDense*)mat->data; 1878 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1879 1880 mat->factortype = A->factortype; 1881 mat->assembled = PETSC_TRUE; 1882 mat->preallocated = PETSC_TRUE; 1883 1884 a->size = oldmat->size; 1885 a->rank = oldmat->rank; 1886 mat->insertmode = NOT_SET_VALUES; 1887 a->nvec = oldmat->nvec; 1888 a->donotstash = oldmat->donotstash; 1889 1890 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1891 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1892 1893 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1894 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1895 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1896 1897 *newmat = mat; 1898 PetscFunctionReturn(0); 1899 } 1900 1901 #undef __FUNCT__ 1902 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1903 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1904 { 1905 PetscErrorCode ierr; 1906 PetscMPIInt rank,size; 1907 PetscInt *rowners,i,m,nz,j; 1908 PetscScalar *array,*vals,*vals_ptr; 1909 MPI_Status status; 1910 1911 PetscFunctionBegin; 1912 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1913 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1914 1915 /* determine ownership of all rows */ 1916 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1917 else m = newmat->rmap->n; 1918 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1919 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1920 rowners[0] = 0; 1921 for (i=2; i<=size; i++) { 1922 rowners[i] += rowners[i-1]; 1923 } 1924 1925 if (!sizesset) { 1926 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1927 } 1928 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1929 ierr = MatGetArray(newmat,&array);CHKERRQ(ierr); 1930 1931 if (!rank) { 1932 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1933 1934 /* read in my part of the matrix numerical values */ 1935 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1936 1937 /* insert into matrix-by row (this is why cannot directly read into array */ 1938 vals_ptr = vals; 1939 for (i=0; i<m; i++) { 1940 for (j=0; j<N; j++) { 1941 array[i + j*m] = *vals_ptr++; 1942 } 1943 } 1944 1945 /* read in other processors and ship out */ 1946 for (i=1; i<size; i++) { 1947 nz = (rowners[i+1] - rowners[i])*N; 1948 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1949 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1950 } 1951 } else { 1952 /* receive numeric values */ 1953 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1954 1955 /* receive message of values*/ 1956 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr); 1957 1958 /* insert into matrix-by row (this is why cannot directly read into array */ 1959 vals_ptr = vals; 1960 for (i=0; i<m; i++) { 1961 for (j=0; j<N; j++) { 1962 array[i + j*m] = *vals_ptr++; 1963 } 1964 } 1965 } 1966 ierr = PetscFree(rowners);CHKERRQ(ierr); 1967 ierr = PetscFree(vals);CHKERRQ(ierr); 1968 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1969 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1970 PetscFunctionReturn(0); 1971 } 1972 1973 #undef __FUNCT__ 1974 #define __FUNCT__ "MatLoad_MPIDense" 1975 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1976 { 1977 PetscScalar *vals,*svals; 1978 MPI_Comm comm = ((PetscObject)viewer)->comm; 1979 MPI_Status status; 1980 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1981 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1982 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1983 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1984 int fd; 1985 PetscErrorCode ierr; 1986 1987 PetscFunctionBegin; 1988 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1989 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1990 if (!rank) { 1991 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1992 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1993 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1994 } 1995 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1996 1997 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1998 M = header[1]; N = header[2]; nz = header[3]; 1999 2000 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2001 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2002 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2003 2004 /* If global sizes are set, check if they are consistent with that given in the file */ 2005 if (sizesset) { 2006 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2007 } 2008 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2009 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2010 2011 /* 2012 Handle case where matrix is stored on disk as a dense matrix 2013 */ 2014 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 2015 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 2016 PetscFunctionReturn(0); 2017 } 2018 2019 /* determine ownership of all rows */ 2020 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 2021 else m = PetscMPIIntCast(newmat->rmap->n); 2022 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2023 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2024 rowners[0] = 0; 2025 for (i=2; i<=size; i++) { 2026 rowners[i] += rowners[i-1]; 2027 } 2028 rstart = rowners[rank]; 2029 rend = rowners[rank+1]; 2030 2031 /* distribute row lengths to all processors */ 2032 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 2033 if (!rank) { 2034 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2035 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2036 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2037 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2038 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2039 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2040 } else { 2041 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2042 } 2043 2044 if (!rank) { 2045 /* calculate the number of nonzeros on each processor */ 2046 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2047 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2048 for (i=0; i<size; i++) { 2049 for (j=rowners[i]; j< rowners[i+1]; j++) { 2050 procsnz[i] += rowlengths[j]; 2051 } 2052 } 2053 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2054 2055 /* determine max buffer needed and allocate it */ 2056 maxnz = 0; 2057 for (i=0; i<size; i++) { 2058 maxnz = PetscMax(maxnz,procsnz[i]); 2059 } 2060 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2061 2062 /* read in my part of the matrix column indices */ 2063 nz = procsnz[0]; 2064 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2065 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2066 2067 /* read in every one elses and ship off */ 2068 for (i=1; i<size; i++) { 2069 nz = procsnz[i]; 2070 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2071 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2072 } 2073 ierr = PetscFree(cols);CHKERRQ(ierr); 2074 } else { 2075 /* determine buffer space needed for message */ 2076 nz = 0; 2077 for (i=0; i<m; i++) { 2078 nz += ourlens[i]; 2079 } 2080 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2081 2082 /* receive message of column indices*/ 2083 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2084 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2085 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2086 } 2087 2088 /* loop over local rows, determining number of off diagonal entries */ 2089 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2090 jj = 0; 2091 for (i=0; i<m; i++) { 2092 for (j=0; j<ourlens[i]; j++) { 2093 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2094 jj++; 2095 } 2096 } 2097 2098 /* create our matrix */ 2099 for (i=0; i<m; i++) { 2100 ourlens[i] -= offlens[i]; 2101 } 2102 2103 if (!sizesset) { 2104 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2105 } 2106 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 2107 for (i=0; i<m; i++) { 2108 ourlens[i] += offlens[i]; 2109 } 2110 2111 if (!rank) { 2112 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2113 2114 /* read in my part of the matrix numerical values */ 2115 nz = procsnz[0]; 2116 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2117 2118 /* insert into matrix */ 2119 jj = rstart; 2120 smycols = mycols; 2121 svals = vals; 2122 for (i=0; i<m; i++) { 2123 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2124 smycols += ourlens[i]; 2125 svals += ourlens[i]; 2126 jj++; 2127 } 2128 2129 /* read in other processors and ship out */ 2130 for (i=1; i<size; i++) { 2131 nz = procsnz[i]; 2132 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2133 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2134 } 2135 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2136 } else { 2137 /* receive numeric values */ 2138 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2139 2140 /* receive message of values*/ 2141 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2142 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2143 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2144 2145 /* insert into matrix */ 2146 jj = rstart; 2147 smycols = mycols; 2148 svals = vals; 2149 for (i=0; i<m; i++) { 2150 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2151 smycols += ourlens[i]; 2152 svals += ourlens[i]; 2153 jj++; 2154 } 2155 } 2156 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2157 ierr = PetscFree(vals);CHKERRQ(ierr); 2158 ierr = PetscFree(mycols);CHKERRQ(ierr); 2159 ierr = PetscFree(rowners);CHKERRQ(ierr); 2160 2161 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2162 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 #undef __FUNCT__ 2167 #define __FUNCT__ "MatEqual_MPIDense" 2168 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2169 { 2170 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2171 Mat a,b; 2172 PetscBool flg; 2173 PetscErrorCode ierr; 2174 2175 PetscFunctionBegin; 2176 a = matA->A; 2177 b = matB->A; 2178 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2179 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2180 PetscFunctionReturn(0); 2181 } 2182 2183 #if defined(PETSC_HAVE_PLAPACK) 2184 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2187 /*@C 2188 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2189 Level: developer 2190 2191 .keywords: Petsc, destroy, package, PLAPACK 2192 .seealso: PetscFinalize() 2193 @*/ 2194 PetscErrorCode PetscPLAPACKFinalizePackage(void) 2195 { 2196 PetscErrorCode ierr; 2197 2198 PetscFunctionBegin; 2199 ierr = PLA_Finalize();CHKERRQ(ierr); 2200 PetscFunctionReturn(0); 2201 } 2202 2203 #undef __FUNCT__ 2204 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2205 /*@C 2206 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2207 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2208 2209 Input Parameter: 2210 . comm - the communicator the matrix lives on 2211 2212 Level: developer 2213 2214 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2215 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2216 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2217 cannot overlap. 2218 2219 .keywords: Petsc, initialize, package, PLAPACK 2220 .seealso: PetscSysInitializePackage(), PetscInitialize() 2221 @*/ 2222 PetscErrorCode PetscPLAPACKInitializePackage(MPI_Comm comm) 2223 { 2224 PetscMPIInt size; 2225 PetscErrorCode ierr; 2226 2227 PetscFunctionBegin; 2228 if (!PLA_Initialized(PETSC_NULL)) { 2229 2230 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2231 Plapack_nprows = 1; 2232 Plapack_npcols = size; 2233 2234 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2235 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2236 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2237 #if defined(PETSC_USE_DEBUG) 2238 Plapack_ierror = 3; 2239 #else 2240 Plapack_ierror = 0; 2241 #endif 2242 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2243 if (Plapack_ierror){ 2244 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2245 } else { 2246 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2247 } 2248 2249 Plapack_nb_alg = 0; 2250 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2251 if (Plapack_nb_alg) { 2252 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2253 } 2254 PetscOptionsEnd(); 2255 2256 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2257 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2258 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2259 } 2260 PetscFunctionReturn(0); 2261 } 2262 2263 #endif 2264