1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #if defined(PETSC_HAVE_PLAPACK) 9 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 10 static MPI_Comm Plapack_comm_2d; 11 EXTERN_C_BEGIN 12 #include <PLA.h> 13 EXTERN_C_END 14 15 typedef struct { 16 PLA_Obj A,pivots; 17 PLA_Template templ; 18 MPI_Datatype datatype; 19 PetscInt nb,rstart; 20 VecScatter ctx; 21 IS is_pla,is_petsc; 22 PetscBool pla_solved; 23 MatStructure mstruct; 24 } Mat_Plapack; 25 #endif 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "MatDenseGetLocalMatrix" 29 /*@ 30 31 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 32 matrix that represents the operator. For sequential matrices it returns itself. 33 34 Input Parameter: 35 . A - the Seq or MPI dense matrix 36 37 Output Parameter: 38 . B - the inner matrix 39 40 Level: intermediate 41 42 @*/ 43 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscBool flg; 48 49 PetscFunctionBegin; 50 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 51 if (flg) { 52 *B = mat->A; 53 } else { 54 *B = A; 55 } 56 PetscFunctionReturn(0); 57 } 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatGetRow_MPIDense" 61 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 62 { 63 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 66 67 PetscFunctionBegin; 68 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 69 lrow = row - rstart; 70 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatRestoreRow_MPIDense" 76 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 77 { 78 PetscErrorCode ierr; 79 80 PetscFunctionBegin; 81 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 82 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 83 PetscFunctionReturn(0); 84 } 85 86 EXTERN_C_BEGIN 87 #undef __FUNCT__ 88 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 89 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 90 { 91 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 92 PetscErrorCode ierr; 93 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 94 PetscScalar *array; 95 MPI_Comm comm; 96 Mat B; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 100 101 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 102 if (!B) { 103 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 104 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 105 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 106 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 107 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 108 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 109 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 110 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 111 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 113 *a = B; 114 ierr = MatDestroy(&B);CHKERRQ(ierr); 115 } else { 116 *a = B; 117 } 118 PetscFunctionReturn(0); 119 } 120 EXTERN_C_END 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatSetValues_MPIDense" 124 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 125 { 126 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 127 PetscErrorCode ierr; 128 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 129 PetscBool roworiented = A->roworiented; 130 131 PetscFunctionBegin; 132 if (v) PetscValidScalarPointer(v,6); 133 for (i=0; i<m; i++) { 134 if (idxm[i] < 0) continue; 135 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 136 if (idxm[i] >= rstart && idxm[i] < rend) { 137 row = idxm[i] - rstart; 138 if (roworiented) { 139 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 140 } else { 141 for (j=0; j<n; j++) { 142 if (idxn[j] < 0) continue; 143 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 144 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 145 } 146 } 147 } else { 148 if (!A->donotstash) { 149 if (roworiented) { 150 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 151 } else { 152 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 153 } 154 } 155 } 156 } 157 PetscFunctionReturn(0); 158 } 159 160 #undef __FUNCT__ 161 #define __FUNCT__ "MatGetValues_MPIDense" 162 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 163 { 164 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 165 PetscErrorCode ierr; 166 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 167 168 PetscFunctionBegin; 169 for (i=0; i<m; i++) { 170 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 171 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 172 if (idxm[i] >= rstart && idxm[i] < rend) { 173 row = idxm[i] - rstart; 174 for (j=0; j<n; j++) { 175 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 176 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 177 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 178 } 179 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 180 } 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatGetArray_MPIDense" 186 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 187 { 188 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 189 PetscErrorCode ierr; 190 191 PetscFunctionBegin; 192 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 193 PetscFunctionReturn(0); 194 } 195 196 #undef __FUNCT__ 197 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 198 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 199 { 200 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 201 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 202 PetscErrorCode ierr; 203 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 204 const PetscInt *irow,*icol; 205 PetscScalar *av,*bv,*v = lmat->v; 206 Mat newmat; 207 IS iscol_local; 208 209 PetscFunctionBegin; 210 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 211 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 212 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 213 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 214 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 215 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 216 217 /* No parallel redistribution currently supported! Should really check each index set 218 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 219 original matrix! */ 220 221 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 222 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 223 224 /* Check submatrix call */ 225 if (scall == MAT_REUSE_MATRIX) { 226 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 227 /* Really need to test rows and column sizes! */ 228 newmat = *B; 229 } else { 230 /* Create and fill new matrix */ 231 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 232 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 233 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 234 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 235 } 236 237 /* Now extract the data pointers and do the copy, column at a time */ 238 newmatd = (Mat_MPIDense*)newmat->data; 239 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 240 241 for (i=0; i<Ncols; i++) { 242 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 243 for (j=0; j<nrows; j++) { 244 *bv++ = av[irow[j] - rstart]; 245 } 246 } 247 248 /* Assemble the matrices so that the correct flags are set */ 249 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 250 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 251 252 /* Free work space */ 253 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 254 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 255 *B = newmat; 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatRestoreArray_MPIDense" 261 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 262 { 263 PetscFunctionBegin; 264 PetscFunctionReturn(0); 265 } 266 267 #undef __FUNCT__ 268 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 269 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 270 { 271 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 272 MPI_Comm comm = ((PetscObject)mat)->comm; 273 PetscErrorCode ierr; 274 PetscInt nstash,reallocs; 275 InsertMode addv; 276 277 PetscFunctionBegin; 278 /* make sure all processors are either in INSERTMODE or ADDMODE */ 279 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 280 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 281 mat->insertmode = addv; /* in case this processor had no cache */ 282 283 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 284 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 285 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 #undef __FUNCT__ 290 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 291 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 292 { 293 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 294 PetscErrorCode ierr; 295 PetscInt i,*row,*col,flg,j,rstart,ncols; 296 PetscMPIInt n; 297 PetscScalar *val; 298 InsertMode addv=mat->insertmode; 299 300 PetscFunctionBegin; 301 /* wait on receives */ 302 while (1) { 303 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 304 if (!flg) break; 305 306 for (i=0; i<n;) { 307 /* Now identify the consecutive vals belonging to the same row */ 308 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 309 if (j < n) ncols = j-i; 310 else ncols = n-i; 311 /* Now assemble all these values with a single function call */ 312 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 313 i = j; 314 } 315 } 316 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 317 318 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 319 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 320 321 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 322 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 323 } 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatZeroEntries_MPIDense" 329 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 330 { 331 PetscErrorCode ierr; 332 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 333 334 PetscFunctionBegin; 335 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338 339 /* the code does not do the diagonal entries correctly unless the 340 matrix is square and the column and row owerships are identical. 341 This is a BUG. The only way to fix it seems to be to access 342 mdn->A and mdn->B directly and not through the MatZeroRows() 343 routine. 344 */ 345 #undef __FUNCT__ 346 #define __FUNCT__ "MatZeroRows_MPIDense" 347 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 348 { 349 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 350 PetscErrorCode ierr; 351 PetscInt i,*owners = A->rmap->range; 352 PetscInt *nprocs,j,idx,nsends; 353 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 354 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 355 PetscInt *lens,*lrows,*values; 356 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 357 MPI_Comm comm = ((PetscObject)A)->comm; 358 MPI_Request *send_waits,*recv_waits; 359 MPI_Status recv_status,*send_status; 360 PetscBool found; 361 const PetscScalar *xx; 362 PetscScalar *bb; 363 364 PetscFunctionBegin; 365 /* first count number of contributors to each processor */ 366 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 367 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 368 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 369 for (i=0; i<N; i++) { 370 idx = rows[i]; 371 found = PETSC_FALSE; 372 for (j=0; j<size; j++) { 373 if (idx >= owners[j] && idx < owners[j+1]) { 374 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 375 } 376 } 377 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 378 } 379 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 380 381 /* inform other processors of number of messages and max length*/ 382 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 383 384 /* post receives: */ 385 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 386 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 387 for (i=0; i<nrecvs; i++) { 388 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 389 } 390 391 /* do sends: 392 1) starts[i] gives the starting index in svalues for stuff going to 393 the ith processor 394 */ 395 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 396 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 397 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 398 starts[0] = 0; 399 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 400 for (i=0; i<N; i++) { 401 svalues[starts[owner[i]]++] = rows[i]; 402 } 403 404 starts[0] = 0; 405 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 406 count = 0; 407 for (i=0; i<size; i++) { 408 if (nprocs[2*i+1]) { 409 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 410 } 411 } 412 ierr = PetscFree(starts);CHKERRQ(ierr); 413 414 base = owners[rank]; 415 416 /* wait on receives */ 417 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 418 count = nrecvs; 419 slen = 0; 420 while (count) { 421 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 422 /* unpack receives into our local space */ 423 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 424 source[imdex] = recv_status.MPI_SOURCE; 425 lens[imdex] = n; 426 slen += n; 427 count--; 428 } 429 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 430 431 /* move the data into the send scatter */ 432 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 433 count = 0; 434 for (i=0; i<nrecvs; i++) { 435 values = rvalues + i*nmax; 436 for (j=0; j<lens[i]; j++) { 437 lrows[count++] = values[j] - base; 438 } 439 } 440 ierr = PetscFree(rvalues);CHKERRQ(ierr); 441 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 442 ierr = PetscFree(owner);CHKERRQ(ierr); 443 ierr = PetscFree(nprocs);CHKERRQ(ierr); 444 445 /* fix right hand side if needed */ 446 if (x && b) { 447 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 448 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 449 for (i=0; i<slen; i++) { 450 bb[lrows[i]] = diag*xx[lrows[i]]; 451 } 452 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 453 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 454 } 455 456 /* actually zap the local rows */ 457 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 458 ierr = PetscFree(lrows);CHKERRQ(ierr); 459 460 /* wait on sends */ 461 if (nsends) { 462 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 463 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 464 ierr = PetscFree(send_status);CHKERRQ(ierr); 465 } 466 ierr = PetscFree(send_waits);CHKERRQ(ierr); 467 ierr = PetscFree(svalues);CHKERRQ(ierr); 468 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatMult_MPIDense" 474 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 475 { 476 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 477 PetscErrorCode ierr; 478 479 PetscFunctionBegin; 480 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 481 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 482 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 483 PetscFunctionReturn(0); 484 } 485 486 #undef __FUNCT__ 487 #define __FUNCT__ "MatMultAdd_MPIDense" 488 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 489 { 490 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 491 PetscErrorCode ierr; 492 493 PetscFunctionBegin; 494 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 495 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 496 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMultTranspose_MPIDense" 502 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 503 { 504 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 505 PetscErrorCode ierr; 506 PetscScalar zero = 0.0; 507 508 PetscFunctionBegin; 509 ierr = VecSet(yy,zero);CHKERRQ(ierr); 510 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 511 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 512 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 513 PetscFunctionReturn(0); 514 } 515 516 #undef __FUNCT__ 517 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 518 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 519 { 520 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 521 PetscErrorCode ierr; 522 523 PetscFunctionBegin; 524 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 525 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 526 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 527 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 528 PetscFunctionReturn(0); 529 } 530 531 #undef __FUNCT__ 532 #define __FUNCT__ "MatGetDiagonal_MPIDense" 533 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 534 { 535 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 536 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 537 PetscErrorCode ierr; 538 PetscInt len,i,n,m = A->rmap->n,radd; 539 PetscScalar *x,zero = 0.0; 540 541 PetscFunctionBegin; 542 ierr = VecSet(v,zero);CHKERRQ(ierr); 543 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 544 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 545 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 546 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 547 radd = A->rmap->rstart*m; 548 for (i=0; i<len; i++) { 549 x[i] = aloc->v[radd + i*m + i]; 550 } 551 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatDestroy_MPIDense" 557 PetscErrorCode MatDestroy_MPIDense(Mat mat) 558 { 559 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 560 PetscErrorCode ierr; 561 #if defined(PETSC_HAVE_PLAPACK) 562 Mat_Plapack *lu=(Mat_Plapack*)mat->spptr; 563 #endif 564 565 PetscFunctionBegin; 566 567 #if defined(PETSC_USE_LOG) 568 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 569 #endif 570 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 571 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 572 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 573 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 574 #if defined(PETSC_HAVE_PLAPACK) 575 if (lu) { 576 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 577 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 578 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 579 ierr = ISDestroy(&lu->is_pla);CHKERRQ(ierr); 580 ierr = ISDestroy(&lu->is_petsc);CHKERRQ(ierr); 581 ierr = VecScatterDestroy(&lu->ctx);CHKERRQ(ierr); 582 } 583 ierr = PetscFree(mat->spptr);CHKERRQ(ierr); 584 #endif 585 586 ierr = PetscFree(mat->data);CHKERRQ(ierr); 587 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 593 PetscFunctionReturn(0); 594 } 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatView_MPIDense_Binary" 598 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 599 { 600 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 601 PetscErrorCode ierr; 602 PetscViewerFormat format; 603 int fd; 604 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 605 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 606 PetscScalar *work,*v,*vv; 607 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 608 609 PetscFunctionBegin; 610 if (mdn->size == 1) { 611 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 612 } else { 613 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 614 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 615 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 616 617 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 618 if (format == PETSC_VIEWER_NATIVE) { 619 620 if (!rank) { 621 /* store the matrix as a dense matrix */ 622 header[0] = MAT_FILE_CLASSID; 623 header[1] = mat->rmap->N; 624 header[2] = N; 625 header[3] = MATRIX_BINARY_FORMAT_DENSE; 626 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 627 628 /* get largest work array needed for transposing array */ 629 mmax = mat->rmap->n; 630 for (i=1; i<size; i++) { 631 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 632 } 633 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 634 635 /* write out local array, by rows */ 636 m = mat->rmap->n; 637 v = a->v; 638 for (j=0; j<N; j++) { 639 for (i=0; i<m; i++) { 640 work[j + i*N] = *v++; 641 } 642 } 643 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 644 /* get largest work array to receive messages from other processes, excludes process zero */ 645 mmax = 0; 646 for (i=1; i<size; i++) { 647 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 648 } 649 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 650 for(k = 1; k < size; k++) { 651 v = vv; 652 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 653 ierr = MPILong_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 654 655 for(j = 0; j < N; j++) { 656 for(i = 0; i < m; i++) { 657 work[j + i*N] = *v++; 658 } 659 } 660 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 661 } 662 ierr = PetscFree(work);CHKERRQ(ierr); 663 ierr = PetscFree(vv);CHKERRQ(ierr); 664 } else { 665 ierr = MPILong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 666 } 667 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 668 } 669 PetscFunctionReturn(0); 670 } 671 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 674 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 675 { 676 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 677 PetscErrorCode ierr; 678 PetscMPIInt size = mdn->size,rank = mdn->rank; 679 const PetscViewerType vtype; 680 PetscBool iascii,isdraw; 681 PetscViewer sviewer; 682 PetscViewerFormat format; 683 #if defined(PETSC_HAVE_PLAPACK) 684 Mat_Plapack *lu; 685 #endif 686 687 PetscFunctionBegin; 688 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 689 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 690 if (iascii) { 691 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 692 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 693 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 694 MatInfo info; 695 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 696 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 697 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 698 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 699 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 700 #if defined(PETSC_HAVE_PLAPACK) 701 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 702 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 703 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 704 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 705 if (mat->factortype){ 706 lu=(Mat_Plapack*)(mat->spptr); 707 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 708 } 709 #else 710 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 711 #endif 712 PetscFunctionReturn(0); 713 } else if (format == PETSC_VIEWER_ASCII_INFO) { 714 PetscFunctionReturn(0); 715 } 716 } else if (isdraw) { 717 PetscDraw draw; 718 PetscBool isnull; 719 720 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 721 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 722 if (isnull) PetscFunctionReturn(0); 723 } 724 725 if (size == 1) { 726 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 727 } else { 728 /* assemble the entire matrix onto first processor. */ 729 Mat A; 730 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 731 PetscInt *cols; 732 PetscScalar *vals; 733 734 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 735 if (!rank) { 736 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 737 } else { 738 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 739 } 740 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 741 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 742 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 743 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 744 745 /* Copy the matrix ... This isn't the most efficient means, 746 but it's quick for now */ 747 A->insertmode = INSERT_VALUES; 748 row = mat->rmap->rstart; m = mdn->A->rmap->n; 749 for (i=0; i<m; i++) { 750 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 751 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 752 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 753 row++; 754 } 755 756 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 757 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 758 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 759 if (!rank) { 760 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 761 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 762 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 763 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 764 } 765 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 766 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 767 ierr = MatDestroy(&A);CHKERRQ(ierr); 768 } 769 PetscFunctionReturn(0); 770 } 771 772 #undef __FUNCT__ 773 #define __FUNCT__ "MatView_MPIDense" 774 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 775 { 776 PetscErrorCode ierr; 777 PetscBool iascii,isbinary,isdraw,issocket; 778 779 PetscFunctionBegin; 780 781 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 782 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 783 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 784 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 785 786 if (iascii || issocket || isdraw) { 787 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 788 } else if (isbinary) { 789 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 790 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 791 PetscFunctionReturn(0); 792 } 793 794 #undef __FUNCT__ 795 #define __FUNCT__ "MatGetInfo_MPIDense" 796 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 797 { 798 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 799 Mat mdn = mat->A; 800 PetscErrorCode ierr; 801 PetscReal isend[5],irecv[5]; 802 803 PetscFunctionBegin; 804 info->block_size = 1.0; 805 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 806 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 807 isend[3] = info->memory; isend[4] = info->mallocs; 808 if (flag == MAT_LOCAL) { 809 info->nz_used = isend[0]; 810 info->nz_allocated = isend[1]; 811 info->nz_unneeded = isend[2]; 812 info->memory = isend[3]; 813 info->mallocs = isend[4]; 814 } else if (flag == MAT_GLOBAL_MAX) { 815 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 816 info->nz_used = irecv[0]; 817 info->nz_allocated = irecv[1]; 818 info->nz_unneeded = irecv[2]; 819 info->memory = irecv[3]; 820 info->mallocs = irecv[4]; 821 } else if (flag == MAT_GLOBAL_SUM) { 822 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 823 info->nz_used = irecv[0]; 824 info->nz_allocated = irecv[1]; 825 info->nz_unneeded = irecv[2]; 826 info->memory = irecv[3]; 827 info->mallocs = irecv[4]; 828 } 829 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 830 info->fill_ratio_needed = 0; 831 info->factor_mallocs = 0; 832 PetscFunctionReturn(0); 833 } 834 835 #undef __FUNCT__ 836 #define __FUNCT__ "MatSetOption_MPIDense" 837 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 838 { 839 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 840 PetscErrorCode ierr; 841 842 PetscFunctionBegin; 843 switch (op) { 844 case MAT_NEW_NONZERO_LOCATIONS: 845 case MAT_NEW_NONZERO_LOCATION_ERR: 846 case MAT_NEW_NONZERO_ALLOCATION_ERR: 847 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 848 break; 849 case MAT_ROW_ORIENTED: 850 a->roworiented = flg; 851 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 852 break; 853 case MAT_NEW_DIAGONALS: 854 case MAT_USE_HASH_TABLE: 855 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 856 break; 857 case MAT_IGNORE_OFF_PROC_ENTRIES: 858 a->donotstash = flg; 859 break; 860 case MAT_SYMMETRIC: 861 case MAT_STRUCTURALLY_SYMMETRIC: 862 case MAT_HERMITIAN: 863 case MAT_SYMMETRY_ETERNAL: 864 case MAT_IGNORE_LOWER_TRIANGULAR: 865 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 866 break; 867 default: 868 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 869 } 870 PetscFunctionReturn(0); 871 } 872 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatDiagonalScale_MPIDense" 876 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 877 { 878 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 879 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 880 PetscScalar *l,*r,x,*v; 881 PetscErrorCode ierr; 882 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 883 884 PetscFunctionBegin; 885 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 886 if (ll) { 887 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 888 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 889 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 890 for (i=0; i<m; i++) { 891 x = l[i]; 892 v = mat->v + i; 893 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 894 } 895 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 896 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 897 } 898 if (rr) { 899 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 900 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 901 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 902 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 904 for (i=0; i<n; i++) { 905 x = r[i]; 906 v = mat->v + i*m; 907 for (j=0; j<m; j++) { (*v++) *= x;} 908 } 909 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 910 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 911 } 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "MatNorm_MPIDense" 917 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 918 { 919 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 920 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 921 PetscErrorCode ierr; 922 PetscInt i,j; 923 PetscReal sum = 0.0; 924 PetscScalar *v = mat->v; 925 926 PetscFunctionBegin; 927 if (mdn->size == 1) { 928 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 929 } else { 930 if (type == NORM_FROBENIUS) { 931 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 932 #if defined(PETSC_USE_COMPLEX) 933 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 934 #else 935 sum += (*v)*(*v); v++; 936 #endif 937 } 938 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 939 *nrm = sqrt(*nrm); 940 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 941 } else if (type == NORM_1) { 942 PetscReal *tmp,*tmp2; 943 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 944 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 945 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 946 *nrm = 0.0; 947 v = mat->v; 948 for (j=0; j<mdn->A->cmap->n; j++) { 949 for (i=0; i<mdn->A->rmap->n; i++) { 950 tmp[j] += PetscAbsScalar(*v); v++; 951 } 952 } 953 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 954 for (j=0; j<A->cmap->N; j++) { 955 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 956 } 957 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 958 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 959 } else if (type == NORM_INFINITY) { /* max row norm */ 960 PetscReal ntemp; 961 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 962 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 963 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 964 } 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatTranspose_MPIDense" 970 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 971 { 972 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 973 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 974 Mat B; 975 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 976 PetscErrorCode ierr; 977 PetscInt j,i; 978 PetscScalar *v; 979 980 PetscFunctionBegin; 981 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 982 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 983 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 984 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 985 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 986 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 987 } else { 988 B = *matout; 989 } 990 991 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 992 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 993 for (i=0; i<m; i++) rwork[i] = rstart + i; 994 for (j=0; j<n; j++) { 995 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 996 v += m; 997 } 998 ierr = PetscFree(rwork);CHKERRQ(ierr); 999 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1000 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1001 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1002 *matout = B; 1003 } else { 1004 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1005 } 1006 PetscFunctionReturn(0); 1007 } 1008 1009 1010 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1011 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1015 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1016 { 1017 PetscErrorCode ierr; 1018 1019 PetscFunctionBegin; 1020 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #if defined(PETSC_HAVE_PLAPACK) 1025 1026 #undef __FUNCT__ 1027 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1028 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1029 { 1030 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1031 PetscErrorCode ierr; 1032 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1033 PetscScalar *array; 1034 PetscReal one = 1.0; 1035 1036 PetscFunctionBegin; 1037 /* Copy A into F->lu->A */ 1038 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1039 ierr = PLA_API_begin();CHKERRQ(ierr); 1040 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1041 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1042 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1043 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1044 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1045 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1046 ierr = PLA_API_end();CHKERRQ(ierr); 1047 lu->rstart = rstart; 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1053 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1054 { 1055 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1056 PetscErrorCode ierr; 1057 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1058 PetscScalar *array; 1059 PetscReal one = 1.0; 1060 1061 PetscFunctionBegin; 1062 /* Copy F into A->lu->A */ 1063 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1064 ierr = PLA_API_begin();CHKERRQ(ierr); 1065 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1066 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1067 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1068 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1069 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1070 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1071 ierr = PLA_API_end();CHKERRQ(ierr); 1072 lu->rstart = rstart; 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1078 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1079 { 1080 PetscErrorCode ierr; 1081 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1082 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1083 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1084 PLA_Obj alpha = NULL,beta = NULL; 1085 1086 PetscFunctionBegin; 1087 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1088 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1089 1090 /* 1091 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1092 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1093 */ 1094 1095 /* do the multiply in PLA */ 1096 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1097 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1098 CHKMEMQ; 1099 1100 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1101 CHKMEMQ; 1102 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1103 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1104 1105 /* 1106 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1107 */ 1108 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1114 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1115 { 1116 PetscErrorCode ierr; 1117 PetscInt m=A->rmap->n,n=B->cmap->n; 1118 Mat Cmat; 1119 1120 PetscFunctionBegin; 1121 if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1122 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1123 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1124 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1125 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1126 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1127 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1128 1129 *C = Cmat; 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1135 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1136 { 1137 PetscErrorCode ierr; 1138 1139 PetscFunctionBegin; 1140 if (scall == MAT_INITIAL_MATRIX){ 1141 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1142 } 1143 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatSolve_MPIDense" 1149 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1150 { 1151 MPI_Comm comm = ((PetscObject)A)->comm; 1152 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1153 PetscErrorCode ierr; 1154 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1155 PetscScalar *array; 1156 PetscReal one = 1.0; 1157 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1158 PLA_Obj v_pla = NULL; 1159 PetscScalar *loc_buf; 1160 Vec loc_x; 1161 1162 PetscFunctionBegin; 1163 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1164 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1165 1166 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1167 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1168 PLA_Obj_set_to_zero(v_pla); 1169 1170 /* Copy b into rhs_pla */ 1171 PLA_API_begin(); 1172 PLA_Obj_API_open(v_pla); 1173 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1174 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1175 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1176 PLA_Obj_API_close(v_pla); 1177 PLA_API_end(); 1178 1179 if (A->factortype == MAT_FACTOR_LU){ 1180 /* Apply the permutations to the right hand sides */ 1181 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1182 1183 /* Solve L y = b, overwriting b with y */ 1184 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1185 1186 /* Solve U x = y (=b), overwriting b with x */ 1187 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1188 } else { /* MAT_FACTOR_CHOLESKY */ 1189 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1190 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1191 PLA_NONUNIT_DIAG,lu->A,v_pla); 1192 } 1193 1194 /* Copy PLAPACK x into Petsc vector x */ 1195 PLA_Obj_local_length(v_pla, &loc_m); 1196 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1197 PLA_Obj_local_stride(v_pla, &loc_stride); 1198 /* 1199 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1200 */ 1201 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1202 if (!lu->pla_solved){ 1203 1204 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1205 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1206 1207 /* Create IS and cts for VecScatterring */ 1208 PLA_Obj_local_length(v_pla, &loc_m); 1209 PLA_Obj_local_stride(v_pla, &loc_stride); 1210 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1211 1212 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1213 for (i=0; i<loc_m; i+=lu->nb){ 1214 j = 0; 1215 while (j < lu->nb && i+j < loc_m){ 1216 idx_petsc[i+j] = rstart + j; j++; 1217 } 1218 rstart += size*lu->nb; 1219 } 1220 1221 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1222 1223 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);CHKERRQ(ierr); 1224 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);CHKERRQ(ierr); 1225 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1226 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1227 } 1228 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 1231 /* Free data */ 1232 ierr = VecDestroy(&loc_x);CHKERRQ(ierr); 1233 PLA_Obj_free(&v_pla); 1234 1235 lu->pla_solved = PETSC_TRUE; 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1241 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1242 { 1243 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1244 PetscErrorCode ierr; 1245 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1246 PetscInt info_pla=0; 1247 PetscScalar *array,one = 1.0; 1248 1249 PetscFunctionBegin; 1250 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1251 PLA_Obj_free(&lu->A); 1252 PLA_Obj_free (&lu->pivots); 1253 } 1254 /* Create PLAPACK matrix object */ 1255 lu->A = NULL; lu->pivots = NULL; 1256 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1257 PLA_Obj_set_to_zero(lu->A); 1258 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1259 1260 /* Copy A into lu->A */ 1261 PLA_API_begin(); 1262 PLA_Obj_API_open(lu->A); 1263 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1264 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1265 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1266 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1267 PLA_Obj_API_close(lu->A); 1268 PLA_API_end(); 1269 1270 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1271 info_pla = PLA_LU(lu->A,lu->pivots); 1272 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1273 1274 lu->rstart = rstart; 1275 lu->mstruct = SAME_NONZERO_PATTERN; 1276 F->ops->solve = MatSolve_MPIDense; 1277 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1283 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1284 { 1285 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1286 PetscErrorCode ierr; 1287 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1288 PetscInt info_pla=0; 1289 PetscScalar *array,one = 1.0; 1290 1291 PetscFunctionBegin; 1292 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1293 PLA_Obj_free(&lu->A); 1294 } 1295 /* Create PLAPACK matrix object */ 1296 lu->A = NULL; 1297 lu->pivots = NULL; 1298 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1299 1300 /* Copy A into lu->A */ 1301 PLA_API_begin(); 1302 PLA_Obj_API_open(lu->A); 1303 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1304 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1305 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1306 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1307 PLA_Obj_API_close(lu->A); 1308 PLA_API_end(); 1309 1310 /* Factor P A -> Chol */ 1311 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1312 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1313 1314 lu->rstart = rstart; 1315 lu->mstruct = SAME_NONZERO_PATTERN; 1316 F->ops->solve = MatSolve_MPIDense; 1317 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1318 PetscFunctionReturn(0); 1319 } 1320 1321 /* Note the Petsc perm permutation is ignored */ 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1324 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1325 { 1326 PetscErrorCode ierr; 1327 PetscBool issymmetric,set; 1328 1329 PetscFunctionBegin; 1330 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1331 if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1332 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1333 PetscFunctionReturn(0); 1334 } 1335 1336 /* Note the Petsc r and c permutations are ignored */ 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1339 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1340 { 1341 PetscErrorCode ierr; 1342 PetscInt M = A->rmap->N; 1343 Mat_Plapack *lu; 1344 1345 PetscFunctionBegin; 1346 lu = (Mat_Plapack*)F->spptr; 1347 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1348 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1349 PetscFunctionReturn(0); 1350 } 1351 1352 EXTERN_C_BEGIN 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1355 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1356 { 1357 PetscFunctionBegin; 1358 *type = MATSOLVERPLAPACK; 1359 PetscFunctionReturn(0); 1360 } 1361 EXTERN_C_END 1362 1363 EXTERN_C_BEGIN 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1366 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1367 { 1368 PetscErrorCode ierr; 1369 Mat_Plapack *lu; 1370 PetscMPIInt size; 1371 PetscInt M=A->rmap->N; 1372 1373 PetscFunctionBegin; 1374 /* Create the factorization matrix */ 1375 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1376 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1377 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1378 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1379 (*F)->spptr = (void*)lu; 1380 1381 /* Set default Plapack parameters */ 1382 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1383 lu->nb = M/size; 1384 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1385 1386 /* Set runtime options */ 1387 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1388 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1389 PetscOptionsEnd(); 1390 1391 /* Create object distribution template */ 1392 lu->templ = NULL; 1393 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1394 1395 /* Set the datatype */ 1396 #if defined(PETSC_USE_COMPLEX) 1397 lu->datatype = MPI_DOUBLE_COMPLEX; 1398 #else 1399 lu->datatype = MPI_DOUBLE; 1400 #endif 1401 1402 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1403 1404 1405 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1406 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1407 1408 if (ftype == MAT_FACTOR_LU) { 1409 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1410 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1411 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1412 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1413 (*F)->factortype = ftype; 1414 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1415 PetscFunctionReturn(0); 1416 } 1417 EXTERN_C_END 1418 #endif 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1422 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1423 { 1424 #if defined(PETSC_HAVE_PLAPACK) 1425 PetscErrorCode ierr; 1426 #endif 1427 1428 PetscFunctionBegin; 1429 #if defined(PETSC_HAVE_PLAPACK) 1430 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1431 #else 1432 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1433 #endif 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatAXPY_MPIDense" 1439 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1440 { 1441 PetscErrorCode ierr; 1442 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1443 1444 PetscFunctionBegin; 1445 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNCT__ 1450 #define __FUNCT__ "MatConjugate_MPIDense" 1451 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1452 { 1453 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1454 PetscErrorCode ierr; 1455 1456 PetscFunctionBegin; 1457 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1458 PetscFunctionReturn(0); 1459 } 1460 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatRealPart_MPIDense" 1463 PetscErrorCode MatRealPart_MPIDense(Mat A) 1464 { 1465 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1466 PetscErrorCode ierr; 1467 1468 PetscFunctionBegin; 1469 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1475 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1476 { 1477 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1478 PetscErrorCode ierr; 1479 1480 PetscFunctionBegin; 1481 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1488 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1489 { 1490 PetscErrorCode ierr; 1491 PetscInt i,n; 1492 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1493 PetscReal *work; 1494 1495 PetscFunctionBegin; 1496 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 1497 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 1498 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1499 if (type == NORM_2) { 1500 for (i=0; i<n; i++) work[i] *= work[i]; 1501 } 1502 if (type == NORM_INFINITY) { 1503 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1504 } else { 1505 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1506 } 1507 ierr = PetscFree(work);CHKERRQ(ierr); 1508 if (type == NORM_2) { 1509 for (i=0; i<n; i++) norms[i] = sqrt(norms[i]); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 /* -------------------------------------------------------------------*/ 1515 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1516 MatGetRow_MPIDense, 1517 MatRestoreRow_MPIDense, 1518 MatMult_MPIDense, 1519 /* 4*/ MatMultAdd_MPIDense, 1520 MatMultTranspose_MPIDense, 1521 MatMultTransposeAdd_MPIDense, 1522 0, 1523 0, 1524 0, 1525 /*10*/ 0, 1526 0, 1527 0, 1528 0, 1529 MatTranspose_MPIDense, 1530 /*15*/ MatGetInfo_MPIDense, 1531 MatEqual_MPIDense, 1532 MatGetDiagonal_MPIDense, 1533 MatDiagonalScale_MPIDense, 1534 MatNorm_MPIDense, 1535 /*20*/ MatAssemblyBegin_MPIDense, 1536 MatAssemblyEnd_MPIDense, 1537 MatSetOption_MPIDense, 1538 MatZeroEntries_MPIDense, 1539 /*24*/ MatZeroRows_MPIDense, 1540 0, 1541 0, 1542 0, 1543 0, 1544 /*29*/ MatSetUpPreallocation_MPIDense, 1545 0, 1546 0, 1547 MatGetArray_MPIDense, 1548 MatRestoreArray_MPIDense, 1549 /*34*/ MatDuplicate_MPIDense, 1550 0, 1551 0, 1552 0, 1553 0, 1554 /*39*/ MatAXPY_MPIDense, 1555 MatGetSubMatrices_MPIDense, 1556 0, 1557 MatGetValues_MPIDense, 1558 0, 1559 /*44*/ 0, 1560 MatScale_MPIDense, 1561 0, 1562 0, 1563 0, 1564 /*49*/ 0, 1565 0, 1566 0, 1567 0, 1568 0, 1569 /*54*/ 0, 1570 0, 1571 0, 1572 0, 1573 0, 1574 /*59*/ MatGetSubMatrix_MPIDense, 1575 MatDestroy_MPIDense, 1576 MatView_MPIDense, 1577 0, 1578 0, 1579 /*64*/ 0, 1580 0, 1581 0, 1582 0, 1583 0, 1584 /*69*/ 0, 1585 0, 1586 0, 1587 0, 1588 0, 1589 /*74*/ 0, 1590 0, 1591 0, 1592 0, 1593 0, 1594 /*79*/ 0, 1595 0, 1596 0, 1597 0, 1598 /*83*/ MatLoad_MPIDense, 1599 0, 1600 0, 1601 0, 1602 0, 1603 0, 1604 /*89*/ 1605 #if defined(PETSC_HAVE_PLAPACK) 1606 MatMatMult_MPIDense_MPIDense, 1607 MatMatMultSymbolic_MPIDense_MPIDense, 1608 MatMatMultNumeric_MPIDense_MPIDense, 1609 #else 1610 0, 1611 0, 1612 0, 1613 #endif 1614 0, 1615 0, 1616 /*94*/ 0, 1617 0, 1618 0, 1619 0, 1620 0, 1621 /*99*/ 0, 1622 0, 1623 0, 1624 MatConjugate_MPIDense, 1625 0, 1626 /*104*/0, 1627 MatRealPart_MPIDense, 1628 MatImaginaryPart_MPIDense, 1629 0, 1630 0, 1631 /*109*/0, 1632 0, 1633 0, 1634 0, 1635 0, 1636 /*114*/0, 1637 0, 1638 0, 1639 0, 1640 0, 1641 /*119*/0, 1642 0, 1643 0, 1644 0, 1645 0, 1646 /*124*/0, 1647 MatGetColumnNorms_MPIDense 1648 }; 1649 1650 EXTERN_C_BEGIN 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1653 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1654 { 1655 Mat_MPIDense *a; 1656 PetscErrorCode ierr; 1657 1658 PetscFunctionBegin; 1659 mat->preallocated = PETSC_TRUE; 1660 /* Note: For now, when data is specified above, this assumes the user correctly 1661 allocates the local dense storage space. We should add error checking. */ 1662 1663 a = (Mat_MPIDense*)mat->data; 1664 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1665 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1666 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1667 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1668 a->nvec = mat->cmap->n; 1669 1670 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1671 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1672 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1673 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1674 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 EXTERN_C_END 1678 1679 /*MC 1680 MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1681 1682 run ./configure with the option --download-plapack 1683 1684 1685 Options Database Keys: 1686 . -mat_plapack_nprows <n> - number of rows in processor partition 1687 . -mat_plapack_npcols <n> - number of columns in processor partition 1688 . -mat_plapack_nb <n> - block size of template vector 1689 . -mat_plapack_nb_alg <n> - algorithmic block size 1690 - -mat_plapack_ckerror <n> - error checking flag 1691 1692 Level: intermediate 1693 1694 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1695 1696 M*/ 1697 1698 EXTERN_C_BEGIN 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatCreate_MPIDense" 1701 PetscErrorCode MatCreate_MPIDense(Mat mat) 1702 { 1703 Mat_MPIDense *a; 1704 PetscErrorCode ierr; 1705 1706 PetscFunctionBegin; 1707 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1708 mat->data = (void*)a; 1709 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1710 1711 mat->insertmode = NOT_SET_VALUES; 1712 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1713 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1714 1715 /* build cache for off array entries formed */ 1716 a->donotstash = PETSC_FALSE; 1717 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1718 1719 /* stuff used for matrix vector multiply */ 1720 a->lvec = 0; 1721 a->Mvctx = 0; 1722 a->roworiented = PETSC_TRUE; 1723 1724 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1725 "MatGetDiagonalBlock_MPIDense", 1726 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1727 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1728 "MatMPIDenseSetPreallocation_MPIDense", 1729 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1730 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1731 "MatMatMult_MPIAIJ_MPIDense", 1732 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1733 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1734 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1735 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1736 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1737 "MatMatMultNumeric_MPIAIJ_MPIDense", 1738 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1739 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1740 "MatGetFactor_mpidense_petsc", 1741 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1742 #if defined(PETSC_HAVE_PLAPACK) 1743 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1744 "MatGetFactor_mpidense_plapack", 1745 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1746 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1747 #endif 1748 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1749 1750 PetscFunctionReturn(0); 1751 } 1752 EXTERN_C_END 1753 1754 /*MC 1755 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1756 1757 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1758 and MATMPIDENSE otherwise. 1759 1760 Options Database Keys: 1761 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1762 1763 Level: beginner 1764 1765 1766 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1767 M*/ 1768 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1771 /*@C 1772 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1773 1774 Not collective 1775 1776 Input Parameters: 1777 . A - the matrix 1778 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1779 to control all matrix memory allocation. 1780 1781 Notes: 1782 The dense format is fully compatible with standard Fortran 77 1783 storage by columns. 1784 1785 The data input variable is intended primarily for Fortran programmers 1786 who wish to allocate their own matrix memory space. Most users should 1787 set data=PETSC_NULL. 1788 1789 Level: intermediate 1790 1791 .keywords: matrix,dense, parallel 1792 1793 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1794 @*/ 1795 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1796 { 1797 PetscErrorCode ierr; 1798 1799 PetscFunctionBegin; 1800 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr); 1801 PetscFunctionReturn(0); 1802 } 1803 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatCreateMPIDense" 1806 /*@C 1807 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1808 1809 Collective on MPI_Comm 1810 1811 Input Parameters: 1812 + comm - MPI communicator 1813 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1814 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1815 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1816 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1817 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1818 to control all matrix memory allocation. 1819 1820 Output Parameter: 1821 . A - the matrix 1822 1823 Notes: 1824 The dense format is fully compatible with standard Fortran 77 1825 storage by columns. 1826 1827 The data input variable is intended primarily for Fortran programmers 1828 who wish to allocate their own matrix memory space. Most users should 1829 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1830 1831 The user MUST specify either the local or global matrix dimensions 1832 (possibly both). 1833 1834 Level: intermediate 1835 1836 .keywords: matrix,dense, parallel 1837 1838 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1839 @*/ 1840 PetscErrorCode MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1841 { 1842 PetscErrorCode ierr; 1843 PetscMPIInt size; 1844 1845 PetscFunctionBegin; 1846 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1847 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1848 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1849 if (size > 1) { 1850 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1851 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1852 } else { 1853 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1854 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatDuplicate_MPIDense" 1861 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1862 { 1863 Mat mat; 1864 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1865 PetscErrorCode ierr; 1866 1867 PetscFunctionBegin; 1868 *newmat = 0; 1869 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1870 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1871 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1872 a = (Mat_MPIDense*)mat->data; 1873 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1874 1875 mat->factortype = A->factortype; 1876 mat->assembled = PETSC_TRUE; 1877 mat->preallocated = PETSC_TRUE; 1878 1879 a->size = oldmat->size; 1880 a->rank = oldmat->rank; 1881 mat->insertmode = NOT_SET_VALUES; 1882 a->nvec = oldmat->nvec; 1883 a->donotstash = oldmat->donotstash; 1884 1885 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1886 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1887 1888 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1889 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1890 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1891 1892 *newmat = mat; 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1898 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1899 { 1900 PetscErrorCode ierr; 1901 PetscMPIInt rank,size; 1902 PetscInt *rowners,i,m,nz,j; 1903 PetscScalar *array,*vals,*vals_ptr; 1904 1905 PetscFunctionBegin; 1906 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1907 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1908 1909 /* determine ownership of all rows */ 1910 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1911 else m = newmat->rmap->n; 1912 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1913 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1914 rowners[0] = 0; 1915 for (i=2; i<=size; i++) { 1916 rowners[i] += rowners[i-1]; 1917 } 1918 1919 if (!sizesset) { 1920 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1921 } 1922 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1923 ierr = MatGetArray(newmat,&array);CHKERRQ(ierr); 1924 1925 if (!rank) { 1926 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1927 1928 /* read in my part of the matrix numerical values */ 1929 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1930 1931 /* insert into matrix-by row (this is why cannot directly read into array */ 1932 vals_ptr = vals; 1933 for (i=0; i<m; i++) { 1934 for (j=0; j<N; j++) { 1935 array[i + j*m] = *vals_ptr++; 1936 } 1937 } 1938 1939 /* read in other processors and ship out */ 1940 for (i=1; i<size; i++) { 1941 nz = (rowners[i+1] - rowners[i])*N; 1942 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1943 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1944 } 1945 } else { 1946 /* receive numeric values */ 1947 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1948 1949 /* receive message of values*/ 1950 ierr = MPILong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1951 1952 /* insert into matrix-by row (this is why cannot directly read into array */ 1953 vals_ptr = vals; 1954 for (i=0; i<m; i++) { 1955 for (j=0; j<N; j++) { 1956 array[i + j*m] = *vals_ptr++; 1957 } 1958 } 1959 } 1960 ierr = PetscFree(rowners);CHKERRQ(ierr); 1961 ierr = PetscFree(vals);CHKERRQ(ierr); 1962 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1963 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 PetscFunctionReturn(0); 1965 } 1966 1967 #undef __FUNCT__ 1968 #define __FUNCT__ "MatLoad_MPIDense" 1969 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1970 { 1971 PetscScalar *vals,*svals; 1972 MPI_Comm comm = ((PetscObject)viewer)->comm; 1973 MPI_Status status; 1974 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1975 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1976 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1977 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1978 int fd; 1979 PetscErrorCode ierr; 1980 1981 PetscFunctionBegin; 1982 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1983 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1984 if (!rank) { 1985 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1986 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1987 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1988 } 1989 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1990 1991 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1992 M = header[1]; N = header[2]; nz = header[3]; 1993 1994 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1995 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1996 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1997 1998 /* If global sizes are set, check if they are consistent with that given in the file */ 1999 if (sizesset) { 2000 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2001 } 2002 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2003 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2004 2005 /* 2006 Handle case where matrix is stored on disk as a dense matrix 2007 */ 2008 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 2009 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 2010 PetscFunctionReturn(0); 2011 } 2012 2013 /* determine ownership of all rows */ 2014 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 2015 else m = PetscMPIIntCast(newmat->rmap->n); 2016 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2017 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2018 rowners[0] = 0; 2019 for (i=2; i<=size; i++) { 2020 rowners[i] += rowners[i-1]; 2021 } 2022 rstart = rowners[rank]; 2023 rend = rowners[rank+1]; 2024 2025 /* distribute row lengths to all processors */ 2026 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 2027 if (!rank) { 2028 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2029 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2030 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2031 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2032 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2033 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2034 } else { 2035 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2036 } 2037 2038 if (!rank) { 2039 /* calculate the number of nonzeros on each processor */ 2040 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2041 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2042 for (i=0; i<size; i++) { 2043 for (j=rowners[i]; j< rowners[i+1]; j++) { 2044 procsnz[i] += rowlengths[j]; 2045 } 2046 } 2047 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2048 2049 /* determine max buffer needed and allocate it */ 2050 maxnz = 0; 2051 for (i=0; i<size; i++) { 2052 maxnz = PetscMax(maxnz,procsnz[i]); 2053 } 2054 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2055 2056 /* read in my part of the matrix column indices */ 2057 nz = procsnz[0]; 2058 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2059 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2060 2061 /* read in every one elses and ship off */ 2062 for (i=1; i<size; i++) { 2063 nz = procsnz[i]; 2064 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2065 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2066 } 2067 ierr = PetscFree(cols);CHKERRQ(ierr); 2068 } else { 2069 /* determine buffer space needed for message */ 2070 nz = 0; 2071 for (i=0; i<m; i++) { 2072 nz += ourlens[i]; 2073 } 2074 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2075 2076 /* receive message of column indices*/ 2077 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2078 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2079 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2080 } 2081 2082 /* loop over local rows, determining number of off diagonal entries */ 2083 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2084 jj = 0; 2085 for (i=0; i<m; i++) { 2086 for (j=0; j<ourlens[i]; j++) { 2087 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2088 jj++; 2089 } 2090 } 2091 2092 /* create our matrix */ 2093 for (i=0; i<m; i++) { 2094 ourlens[i] -= offlens[i]; 2095 } 2096 2097 if (!sizesset) { 2098 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2099 } 2100 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 2101 for (i=0; i<m; i++) { 2102 ourlens[i] += offlens[i]; 2103 } 2104 2105 if (!rank) { 2106 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2107 2108 /* read in my part of the matrix numerical values */ 2109 nz = procsnz[0]; 2110 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2111 2112 /* insert into matrix */ 2113 jj = rstart; 2114 smycols = mycols; 2115 svals = vals; 2116 for (i=0; i<m; i++) { 2117 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2118 smycols += ourlens[i]; 2119 svals += ourlens[i]; 2120 jj++; 2121 } 2122 2123 /* read in other processors and ship out */ 2124 for (i=1; i<size; i++) { 2125 nz = procsnz[i]; 2126 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2127 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2128 } 2129 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2130 } else { 2131 /* receive numeric values */ 2132 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2133 2134 /* receive message of values*/ 2135 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2136 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2137 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2138 2139 /* insert into matrix */ 2140 jj = rstart; 2141 smycols = mycols; 2142 svals = vals; 2143 for (i=0; i<m; i++) { 2144 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2145 smycols += ourlens[i]; 2146 svals += ourlens[i]; 2147 jj++; 2148 } 2149 } 2150 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2151 ierr = PetscFree(vals);CHKERRQ(ierr); 2152 ierr = PetscFree(mycols);CHKERRQ(ierr); 2153 ierr = PetscFree(rowners);CHKERRQ(ierr); 2154 2155 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2156 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 #undef __FUNCT__ 2161 #define __FUNCT__ "MatEqual_MPIDense" 2162 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2163 { 2164 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2165 Mat a,b; 2166 PetscBool flg; 2167 PetscErrorCode ierr; 2168 2169 PetscFunctionBegin; 2170 a = matA->A; 2171 b = matB->A; 2172 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2173 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 #if defined(PETSC_HAVE_PLAPACK) 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2181 /*@C 2182 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2183 Level: developer 2184 2185 .keywords: Petsc, destroy, package, PLAPACK 2186 .seealso: PetscFinalize() 2187 @*/ 2188 PetscErrorCode PetscPLAPACKFinalizePackage(void) 2189 { 2190 PetscErrorCode ierr; 2191 2192 PetscFunctionBegin; 2193 ierr = PLA_Finalize();CHKERRQ(ierr); 2194 PetscFunctionReturn(0); 2195 } 2196 2197 #undef __FUNCT__ 2198 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2199 /*@C 2200 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2201 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2202 2203 Input Parameter: 2204 . comm - the communicator the matrix lives on 2205 2206 Level: developer 2207 2208 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2209 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2210 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2211 cannot overlap. 2212 2213 .keywords: Petsc, initialize, package, PLAPACK 2214 .seealso: PetscSysInitializePackage(), PetscInitialize() 2215 @*/ 2216 PetscErrorCode PetscPLAPACKInitializePackage(MPI_Comm comm) 2217 { 2218 PetscMPIInt size; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 if (!PLA_Initialized(PETSC_NULL)) { 2223 2224 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2225 Plapack_nprows = 1; 2226 Plapack_npcols = size; 2227 2228 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2229 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2230 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2231 #if defined(PETSC_USE_DEBUG) 2232 Plapack_ierror = 3; 2233 #else 2234 Plapack_ierror = 0; 2235 #endif 2236 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2237 if (Plapack_ierror){ 2238 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2239 } else { 2240 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2241 } 2242 2243 Plapack_nb_alg = 0; 2244 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2245 if (Plapack_nb_alg) { 2246 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2247 } 2248 PetscOptionsEnd(); 2249 2250 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2251 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2252 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #endif 2258