1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact) 12 { 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 17 PetscFunctionReturn(0); 18 } 19 20 #undef __FUNCT__ 21 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 22 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 23 { 24 PetscErrorCode ierr; 25 26 PetscFunctionBegin; 27 ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 28 PetscFunctionReturn(0); 29 } 30 31 #undef __FUNCT__ 32 #define __FUNCT__ "MatDenseGetLocalMatrix" 33 /*@ 34 35 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 36 matrix that represents the operator. For sequential matrices it returns itself. 37 38 Input Parameter: 39 . A - the Seq or MPI dense matrix 40 41 Output Parameter: 42 . B - the inner matrix 43 44 Level: intermediate 45 46 @*/ 47 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 48 { 49 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 50 PetscErrorCode ierr; 51 PetscTruth flg; 52 PetscMPIInt size; 53 54 PetscFunctionBegin; 55 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 56 if (!flg) { /* this check sucks! */ 57 ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr); 58 if (flg) { 59 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 60 if (size == 1) flg = PETSC_FALSE; 61 } 62 } 63 if (flg) { 64 *B = mat->A; 65 } else { 66 *B = A; 67 } 68 PetscFunctionReturn(0); 69 } 70 71 #undef __FUNCT__ 72 #define __FUNCT__ "MatGetRow_MPIDense" 73 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 74 { 75 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 76 PetscErrorCode ierr; 77 PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend; 78 79 PetscFunctionBegin; 80 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 81 lrow = row - rstart; 82 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 86 #undef __FUNCT__ 87 #define __FUNCT__ "MatRestoreRow_MPIDense" 88 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 89 { 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 94 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 95 PetscFunctionReturn(0); 96 } 97 98 EXTERN_C_BEGIN 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 101 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 102 { 103 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 104 PetscErrorCode ierr; 105 PetscInt m = A->rmap.n,rstart = A->rmap.rstart; 106 PetscScalar *array; 107 MPI_Comm comm; 108 109 PetscFunctionBegin; 110 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 111 112 /* The reuse aspect is not implemented efficiently */ 113 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 114 115 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 116 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 117 ierr = MatCreate(comm,B);CHKERRQ(ierr); 118 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 119 ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr); 120 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 121 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 122 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 123 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 124 125 *iscopy = PETSC_TRUE; 126 PetscFunctionReturn(0); 127 } 128 EXTERN_C_END 129 130 #undef __FUNCT__ 131 #define __FUNCT__ "MatSetValues_MPIDense" 132 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 133 { 134 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 135 PetscErrorCode ierr; 136 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 137 PetscTruth roworiented = A->roworiented; 138 139 PetscFunctionBegin; 140 for (i=0; i<m; i++) { 141 if (idxm[i] < 0) continue; 142 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 if (roworiented) { 146 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 147 } else { 148 for (j=0; j<n; j++) { 149 if (idxn[j] < 0) continue; 150 if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 151 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 152 } 153 } 154 } else { 155 if (!A->donotstash) { 156 if (roworiented) { 157 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 158 } else { 159 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 160 } 161 } 162 } 163 } 164 PetscFunctionReturn(0); 165 } 166 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatGetValues_MPIDense" 169 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 170 { 171 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 172 PetscErrorCode ierr; 173 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 174 175 PetscFunctionBegin; 176 for (i=0; i<m; i++) { 177 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 178 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 179 if (idxm[i] >= rstart && idxm[i] < rend) { 180 row = idxm[i] - rstart; 181 for (j=0; j<n; j++) { 182 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 183 if (idxn[j] >= mat->cmap.N) { 184 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 185 } 186 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 187 } 188 } else { 189 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 190 } 191 } 192 PetscFunctionReturn(0); 193 } 194 195 #undef __FUNCT__ 196 #define __FUNCT__ "MatGetArray_MPIDense" 197 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 198 { 199 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 200 PetscErrorCode ierr; 201 202 PetscFunctionBegin; 203 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 209 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 210 { 211 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 212 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 213 PetscErrorCode ierr; 214 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 215 PetscScalar *av,*bv,*v = lmat->v; 216 Mat newmat; 217 218 PetscFunctionBegin; 219 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 220 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 221 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 222 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 223 224 /* No parallel redistribution currently supported! Should really check each index set 225 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 226 original matrix! */ 227 228 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 229 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 230 231 /* Check submatrix call */ 232 if (scall == MAT_REUSE_MATRIX) { 233 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 234 /* Really need to test rows and column sizes! */ 235 newmat = *B; 236 } else { 237 /* Create and fill new matrix */ 238 ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr); 239 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 240 ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr); 241 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 242 } 243 244 /* Now extract the data pointers and do the copy, column at a time */ 245 newmatd = (Mat_MPIDense*)newmat->data; 246 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 247 248 for (i=0; i<ncols; i++) { 249 av = v + nlrows*icol[i]; 250 for (j=0; j<nrows; j++) { 251 *bv++ = av[irow[j] - rstart]; 252 } 253 } 254 255 /* Assemble the matrices so that the correct flags are set */ 256 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 257 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 258 259 /* Free work space */ 260 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 261 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 262 *B = newmat; 263 PetscFunctionReturn(0); 264 } 265 266 #undef __FUNCT__ 267 #define __FUNCT__ "MatRestoreArray_MPIDense" 268 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 269 { 270 PetscFunctionBegin; 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 276 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 277 { 278 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 279 MPI_Comm comm = mat->comm; 280 PetscErrorCode ierr; 281 PetscInt nstash,reallocs; 282 InsertMode addv; 283 284 PetscFunctionBegin; 285 /* make sure all processors are either in INSERTMODE or ADDMODE */ 286 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 287 if (addv == (ADD_VALUES|INSERT_VALUES)) { 288 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 289 } 290 mat->insertmode = addv; /* in case this processor had no cache */ 291 292 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 293 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 294 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 295 PetscFunctionReturn(0); 296 } 297 298 #undef __FUNCT__ 299 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 300 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 301 { 302 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 303 PetscErrorCode ierr; 304 PetscInt i,*row,*col,flg,j,rstart,ncols; 305 PetscMPIInt n; 306 PetscScalar *val; 307 InsertMode addv=mat->insertmode; 308 309 PetscFunctionBegin; 310 /* wait on receives */ 311 while (1) { 312 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 313 if (!flg) break; 314 315 for (i=0; i<n;) { 316 /* Now identify the consecutive vals belonging to the same row */ 317 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 318 if (j < n) ncols = j-i; 319 else ncols = n-i; 320 /* Now assemble all these values with a single function call */ 321 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 322 i = j; 323 } 324 } 325 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 326 327 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 328 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 329 330 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 331 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 332 } 333 PetscFunctionReturn(0); 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "MatZeroEntries_MPIDense" 338 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 339 { 340 PetscErrorCode ierr; 341 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 342 343 PetscFunctionBegin; 344 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 /* the code does not do the diagonal entries correctly unless the 349 matrix is square and the column and row owerships are identical. 350 This is a BUG. The only way to fix it seems to be to access 351 mdn->A and mdn->B directly and not through the MatZeroRows() 352 routine. 353 */ 354 #undef __FUNCT__ 355 #define __FUNCT__ "MatZeroRows_MPIDense" 356 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 357 { 358 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 359 PetscErrorCode ierr; 360 PetscInt i,*owners = A->rmap.range; 361 PetscInt *nprocs,j,idx,nsends; 362 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 363 PetscInt *rvalues,tag = A->tag,count,base,slen,*source; 364 PetscInt *lens,*lrows,*values; 365 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 366 MPI_Comm comm = A->comm; 367 MPI_Request *send_waits,*recv_waits; 368 MPI_Status recv_status,*send_status; 369 PetscTruth found; 370 371 PetscFunctionBegin; 372 /* first count number of contributors to each processor */ 373 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 374 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 375 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 376 for (i=0; i<N; i++) { 377 idx = rows[i]; 378 found = PETSC_FALSE; 379 for (j=0; j<size; j++) { 380 if (idx >= owners[j] && idx < owners[j+1]) { 381 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 382 } 383 } 384 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 385 } 386 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 387 388 /* inform other processors of number of messages and max length*/ 389 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 390 391 /* post receives: */ 392 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 393 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 394 for (i=0; i<nrecvs; i++) { 395 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 396 } 397 398 /* do sends: 399 1) starts[i] gives the starting index in svalues for stuff going to 400 the ith processor 401 */ 402 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 403 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 404 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 405 starts[0] = 0; 406 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 407 for (i=0; i<N; i++) { 408 svalues[starts[owner[i]]++] = rows[i]; 409 } 410 411 starts[0] = 0; 412 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 413 count = 0; 414 for (i=0; i<size; i++) { 415 if (nprocs[2*i+1]) { 416 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 417 } 418 } 419 ierr = PetscFree(starts);CHKERRQ(ierr); 420 421 base = owners[rank]; 422 423 /* wait on receives */ 424 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 425 source = lens + nrecvs; 426 count = nrecvs; slen = 0; 427 while (count) { 428 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 429 /* unpack receives into our local space */ 430 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 431 source[imdex] = recv_status.MPI_SOURCE; 432 lens[imdex] = n; 433 slen += n; 434 count--; 435 } 436 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 437 438 /* move the data into the send scatter */ 439 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 440 count = 0; 441 for (i=0; i<nrecvs; i++) { 442 values = rvalues + i*nmax; 443 for (j=0; j<lens[i]; j++) { 444 lrows[count++] = values[j] - base; 445 } 446 } 447 ierr = PetscFree(rvalues);CHKERRQ(ierr); 448 ierr = PetscFree(lens);CHKERRQ(ierr); 449 ierr = PetscFree(owner);CHKERRQ(ierr); 450 ierr = PetscFree(nprocs);CHKERRQ(ierr); 451 452 /* actually zap the local rows */ 453 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 454 ierr = PetscFree(lrows);CHKERRQ(ierr); 455 456 /* wait on sends */ 457 if (nsends) { 458 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 459 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 460 ierr = PetscFree(send_status);CHKERRQ(ierr); 461 } 462 ierr = PetscFree(send_waits);CHKERRQ(ierr); 463 ierr = PetscFree(svalues);CHKERRQ(ierr); 464 465 PetscFunctionReturn(0); 466 } 467 468 #undef __FUNCT__ 469 #define __FUNCT__ "MatMult_MPIDense" 470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 471 { 472 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 473 PetscErrorCode ierr; 474 475 PetscFunctionBegin; 476 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 477 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 478 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatMultAdd_MPIDense" 484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 491 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 492 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatMultTranspose_MPIDense" 498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 499 { 500 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 501 PetscErrorCode ierr; 502 PetscScalar zero = 0.0; 503 504 PetscFunctionBegin; 505 ierr = VecSet(yy,zero);CHKERRQ(ierr); 506 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 507 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 508 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 515 { 516 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 517 PetscErrorCode ierr; 518 519 PetscFunctionBegin; 520 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 521 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 522 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 523 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 524 PetscFunctionReturn(0); 525 } 526 527 #undef __FUNCT__ 528 #define __FUNCT__ "MatGetDiagonal_MPIDense" 529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 530 { 531 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 532 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 533 PetscErrorCode ierr; 534 PetscInt len,i,n,m = A->rmap.n,radd; 535 PetscScalar *x,zero = 0.0; 536 537 PetscFunctionBegin; 538 ierr = VecSet(v,zero);CHKERRQ(ierr); 539 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 540 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 541 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 542 len = PetscMin(a->A->rmap.n,a->A->cmap.n); 543 radd = A->rmap.rstart*m; 544 for (i=0; i<len; i++) { 545 x[i] = aloc->v[radd + i*m + i]; 546 } 547 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatDestroy_MPIDense" 553 PetscErrorCode MatDestroy_MPIDense(Mat mat) 554 { 555 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 556 PetscErrorCode ierr; 557 558 PetscFunctionBegin; 559 560 #if defined(PETSC_USE_LOG) 561 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 562 #endif 563 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 564 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 565 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 566 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 567 if (mdn->factor) { 568 ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr); 569 ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr); 570 ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr); 571 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 572 } 573 ierr = PetscFree(mdn);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatView_MPIDense_Binary" 581 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 582 { 583 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 584 PetscErrorCode ierr; 585 586 PetscFunctionBegin; 587 if (mdn->size == 1) { 588 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 589 } 590 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 591 PetscFunctionReturn(0); 592 } 593 594 #undef __FUNCT__ 595 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 596 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 597 { 598 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 599 PetscErrorCode ierr; 600 PetscMPIInt size = mdn->size,rank = mdn->rank; 601 PetscViewerType vtype; 602 PetscTruth iascii,isdraw; 603 PetscViewer sviewer; 604 PetscViewerFormat format; 605 606 PetscFunctionBegin; 607 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 608 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 609 if (iascii) { 610 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 611 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 612 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 613 MatInfo info; 614 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 615 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n, 616 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 617 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 618 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 619 PetscFunctionReturn(0); 620 } else if (format == PETSC_VIEWER_ASCII_INFO) { 621 PetscFunctionReturn(0); 622 } 623 } else if (isdraw) { 624 PetscDraw draw; 625 PetscTruth isnull; 626 627 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 628 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 629 if (isnull) PetscFunctionReturn(0); 630 } 631 632 if (size == 1) { 633 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 634 } else { 635 /* assemble the entire matrix onto first processor. */ 636 Mat A; 637 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz; 638 PetscInt *cols; 639 PetscScalar *vals; 640 641 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 642 if (!rank) { 643 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 644 } else { 645 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 646 } 647 /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */ 648 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 649 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 650 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 651 652 /* Copy the matrix ... This isn't the most efficient means, 653 but it's quick for now */ 654 A->insertmode = INSERT_VALUES; 655 row = mat->rmap.rstart; m = mdn->A->rmap.n; 656 for (i=0; i<m; i++) { 657 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 658 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 659 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 660 row++; 661 } 662 663 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 664 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 665 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 666 if (!rank) { 667 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 668 } 669 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 670 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 671 ierr = MatDestroy(A);CHKERRQ(ierr); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatView_MPIDense" 678 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 679 { 680 PetscErrorCode ierr; 681 PetscTruth iascii,isbinary,isdraw,issocket; 682 683 PetscFunctionBegin; 684 685 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 686 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 687 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 688 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 689 690 if (iascii || issocket || isdraw) { 691 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 692 } else if (isbinary) { 693 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 694 } else { 695 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 696 } 697 PetscFunctionReturn(0); 698 } 699 700 #undef __FUNCT__ 701 #define __FUNCT__ "MatGetInfo_MPIDense" 702 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 703 { 704 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 705 Mat mdn = mat->A; 706 PetscErrorCode ierr; 707 PetscReal isend[5],irecv[5]; 708 709 PetscFunctionBegin; 710 info->rows_global = (double)A->rmap.N; 711 info->columns_global = (double)A->cmap.N; 712 info->rows_local = (double)A->rmap.n; 713 info->columns_local = (double)A->cmap.N; 714 info->block_size = 1.0; 715 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 716 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 717 isend[3] = info->memory; isend[4] = info->mallocs; 718 if (flag == MAT_LOCAL) { 719 info->nz_used = isend[0]; 720 info->nz_allocated = isend[1]; 721 info->nz_unneeded = isend[2]; 722 info->memory = isend[3]; 723 info->mallocs = isend[4]; 724 } else if (flag == MAT_GLOBAL_MAX) { 725 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 726 info->nz_used = irecv[0]; 727 info->nz_allocated = irecv[1]; 728 info->nz_unneeded = irecv[2]; 729 info->memory = irecv[3]; 730 info->mallocs = irecv[4]; 731 } else if (flag == MAT_GLOBAL_SUM) { 732 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 733 info->nz_used = irecv[0]; 734 info->nz_allocated = irecv[1]; 735 info->nz_unneeded = irecv[2]; 736 info->memory = irecv[3]; 737 info->mallocs = irecv[4]; 738 } 739 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 740 info->fill_ratio_needed = 0; 741 info->factor_mallocs = 0; 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatSetOption_MPIDense" 747 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op) 748 { 749 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 750 PetscErrorCode ierr; 751 752 PetscFunctionBegin; 753 switch (op) { 754 case MAT_NO_NEW_NONZERO_LOCATIONS: 755 case MAT_YES_NEW_NONZERO_LOCATIONS: 756 case MAT_NEW_NONZERO_LOCATION_ERR: 757 case MAT_NEW_NONZERO_ALLOCATION_ERR: 758 case MAT_COLUMNS_SORTED: 759 case MAT_COLUMNS_UNSORTED: 760 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 761 break; 762 case MAT_ROW_ORIENTED: 763 a->roworiented = PETSC_TRUE; 764 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 765 break; 766 case MAT_ROWS_SORTED: 767 case MAT_ROWS_UNSORTED: 768 case MAT_YES_NEW_DIAGONALS: 769 case MAT_USE_HASH_TABLE: 770 ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr); 771 break; 772 case MAT_COLUMN_ORIENTED: 773 a->roworiented = PETSC_FALSE; 774 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 775 break; 776 case MAT_IGNORE_OFF_PROC_ENTRIES: 777 a->donotstash = PETSC_TRUE; 778 break; 779 case MAT_NO_NEW_DIAGONALS: 780 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 781 case MAT_SYMMETRIC: 782 case MAT_STRUCTURALLY_SYMMETRIC: 783 case MAT_NOT_SYMMETRIC: 784 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 785 case MAT_HERMITIAN: 786 case MAT_NOT_HERMITIAN: 787 case MAT_SYMMETRY_ETERNAL: 788 case MAT_NOT_SYMMETRY_ETERNAL: 789 break; 790 default: 791 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 792 } 793 PetscFunctionReturn(0); 794 } 795 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatDiagonalScale_MPIDense" 799 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 800 { 801 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 802 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 803 PetscScalar *l,*r,x,*v; 804 PetscErrorCode ierr; 805 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n; 806 807 PetscFunctionBegin; 808 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 809 if (ll) { 810 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 811 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 812 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 813 for (i=0; i<m; i++) { 814 x = l[i]; 815 v = mat->v + i; 816 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 817 } 818 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 819 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 820 } 821 if (rr) { 822 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 823 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 824 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 825 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 826 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 827 for (i=0; i<n; i++) { 828 x = r[i]; 829 v = mat->v + i*m; 830 for (j=0; j<m; j++) { (*v++) *= x;} 831 } 832 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 833 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 834 } 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatNorm_MPIDense" 840 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 841 { 842 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 843 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 844 PetscErrorCode ierr; 845 PetscInt i,j; 846 PetscReal sum = 0.0; 847 PetscScalar *v = mat->v; 848 849 PetscFunctionBegin; 850 if (mdn->size == 1) { 851 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 852 } else { 853 if (type == NORM_FROBENIUS) { 854 for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) { 855 #if defined(PETSC_USE_COMPLEX) 856 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 857 #else 858 sum += (*v)*(*v); v++; 859 #endif 860 } 861 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 862 *nrm = sqrt(*nrm); 863 ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr); 864 } else if (type == NORM_1) { 865 PetscReal *tmp,*tmp2; 866 ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 867 tmp2 = tmp + A->cmap.N; 868 ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 869 *nrm = 0.0; 870 v = mat->v; 871 for (j=0; j<mdn->A->cmap.n; j++) { 872 for (i=0; i<mdn->A->rmap.n; i++) { 873 tmp[j] += PetscAbsScalar(*v); v++; 874 } 875 } 876 ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 877 for (j=0; j<A->cmap.N; j++) { 878 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 879 } 880 ierr = PetscFree(tmp);CHKERRQ(ierr); 881 ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr); 882 } else if (type == NORM_INFINITY) { /* max row norm */ 883 PetscReal ntemp; 884 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 885 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 886 } else { 887 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 888 } 889 } 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatTranspose_MPIDense" 895 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout) 896 { 897 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 898 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 899 Mat B; 900 PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart; 901 PetscErrorCode ierr; 902 PetscInt j,i; 903 PetscScalar *v; 904 905 PetscFunctionBegin; 906 if (!matout && M != N) { 907 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 908 } 909 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 910 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 911 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 912 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 913 914 m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v; 915 ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 916 for (j=0; j<n; j++) { 917 for (i=0; i<m; i++) rwork[i] = rstart + i; 918 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 919 v += m; 920 } 921 ierr = PetscFree(rwork);CHKERRQ(ierr); 922 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 923 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 924 if (matout) { 925 *matout = B; 926 } else { 927 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 928 } 929 PetscFunctionReturn(0); 930 } 931 932 #include "petscblaslapack.h" 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatScale_MPIDense" 935 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 936 { 937 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 938 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 939 PetscScalar oalpha = alpha; 940 PetscBLASInt one = 1,nz = (PetscBLASInt)inA->rmap.n*inA->cmap.N; 941 PetscErrorCode ierr; 942 943 PetscFunctionBegin; 944 BLASscal_(&nz,&oalpha,a->v,&one); 945 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 949 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 953 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 954 { 955 PetscErrorCode ierr; 956 957 PetscFunctionBegin; 958 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 962 /* -------------------------------------------------------------------*/ 963 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 964 MatGetRow_MPIDense, 965 MatRestoreRow_MPIDense, 966 MatMult_MPIDense, 967 /* 4*/ MatMultAdd_MPIDense, 968 MatMultTranspose_MPIDense, 969 MatMultTransposeAdd_MPIDense, 970 0, 971 0, 972 0, 973 /*10*/ 0, 974 0, 975 0, 976 0, 977 MatTranspose_MPIDense, 978 /*15*/ MatGetInfo_MPIDense, 979 0, 980 MatGetDiagonal_MPIDense, 981 MatDiagonalScale_MPIDense, 982 MatNorm_MPIDense, 983 /*20*/ MatAssemblyBegin_MPIDense, 984 MatAssemblyEnd_MPIDense, 985 0, 986 MatSetOption_MPIDense, 987 MatZeroEntries_MPIDense, 988 /*25*/ MatZeroRows_MPIDense, 989 MatLUFactorSymbolic_MPIDense, 990 0, 991 MatCholeskyFactorSymbolic_MPIDense, 992 0, 993 /*30*/ MatSetUpPreallocation_MPIDense, 994 0, 995 0, 996 MatGetArray_MPIDense, 997 MatRestoreArray_MPIDense, 998 /*35*/ MatDuplicate_MPIDense, 999 0, 1000 0, 1001 0, 1002 0, 1003 /*40*/ 0, 1004 MatGetSubMatrices_MPIDense, 1005 0, 1006 MatGetValues_MPIDense, 1007 0, 1008 /*45*/ 0, 1009 MatScale_MPIDense, 1010 0, 1011 0, 1012 0, 1013 /*50*/ 0, 1014 0, 1015 0, 1016 0, 1017 0, 1018 /*55*/ 0, 1019 0, 1020 0, 1021 0, 1022 0, 1023 /*60*/ MatGetSubMatrix_MPIDense, 1024 MatDestroy_MPIDense, 1025 MatView_MPIDense, 1026 0, 1027 0, 1028 /*65*/ 0, 1029 0, 1030 0, 1031 0, 1032 0, 1033 /*70*/ 0, 1034 0, 1035 0, 1036 0, 1037 0, 1038 /*75*/ 0, 1039 0, 1040 0, 1041 0, 1042 0, 1043 /*80*/ 0, 1044 0, 1045 0, 1046 0, 1047 /*84*/ MatLoad_MPIDense, 1048 0, 1049 0, 1050 0, 1051 0, 1052 0, 1053 /*90*/ 0, 1054 0, 1055 0, 1056 0, 1057 0, 1058 /*95*/ 0, 1059 0, 1060 0, 1061 0}; 1062 1063 EXTERN_C_BEGIN 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1066 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1067 { 1068 Mat_MPIDense *a; 1069 PetscErrorCode ierr; 1070 1071 PetscFunctionBegin; 1072 mat->preallocated = PETSC_TRUE; 1073 /* Note: For now, when data is specified above, this assumes the user correctly 1074 allocates the local dense storage space. We should add error checking. */ 1075 1076 a = (Mat_MPIDense*)mat->data; 1077 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1078 ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr); 1079 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1080 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1081 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 EXTERN_C_END 1085 1086 /*MC 1087 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1088 1089 Options Database Keys: 1090 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1091 1092 Level: beginner 1093 1094 .seealso: MatCreateMPIDense 1095 M*/ 1096 1097 EXTERN_C_BEGIN 1098 #undef __FUNCT__ 1099 #define __FUNCT__ "MatCreate_MPIDense" 1100 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1101 { 1102 Mat_MPIDense *a; 1103 PetscErrorCode ierr; 1104 1105 PetscFunctionBegin; 1106 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1107 mat->data = (void*)a; 1108 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1109 mat->factor = 0; 1110 mat->mapping = 0; 1111 1112 a->factor = 0; 1113 mat->insertmode = NOT_SET_VALUES; 1114 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 1115 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 1116 1117 mat->rmap.bs = mat->cmap.bs = 1; 1118 ierr = PetscMapInitialize(mat->comm,&mat->rmap);CHKERRQ(ierr); 1119 ierr = PetscMapInitialize(mat->comm,&mat->cmap);CHKERRQ(ierr); 1120 a->nvec = mat->cmap.n; 1121 1122 /* build cache for off array entries formed */ 1123 a->donotstash = PETSC_FALSE; 1124 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1125 1126 /* stuff used for matrix vector multiply */ 1127 a->lvec = 0; 1128 a->Mvctx = 0; 1129 a->roworiented = PETSC_TRUE; 1130 1131 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1132 "MatGetDiagonalBlock_MPIDense", 1133 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1134 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1135 "MatMPIDenseSetPreallocation_MPIDense", 1136 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 EXTERN_C_END 1140 1141 /*MC 1142 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1143 1144 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1145 and MATMPIDENSE otherwise. 1146 1147 Options Database Keys: 1148 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1149 1150 Level: beginner 1151 1152 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1153 M*/ 1154 1155 EXTERN_C_BEGIN 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatCreate_Dense" 1158 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1159 { 1160 PetscErrorCode ierr; 1161 PetscMPIInt size; 1162 1163 PetscFunctionBegin; 1164 ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr); 1165 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1166 if (size == 1) { 1167 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1168 } else { 1169 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1170 } 1171 PetscFunctionReturn(0); 1172 } 1173 EXTERN_C_END 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1177 /*@C 1178 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1179 1180 Not collective 1181 1182 Input Parameters: 1183 . A - the matrix 1184 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1185 to control all matrix memory allocation. 1186 1187 Notes: 1188 The dense format is fully compatible with standard Fortran 77 1189 storage by columns. 1190 1191 The data input variable is intended primarily for Fortran programmers 1192 who wish to allocate their own matrix memory space. Most users should 1193 set data=PETSC_NULL. 1194 1195 Level: intermediate 1196 1197 .keywords: matrix,dense, parallel 1198 1199 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1200 @*/ 1201 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1202 { 1203 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1204 1205 PetscFunctionBegin; 1206 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1207 if (f) { 1208 ierr = (*f)(mat,data);CHKERRQ(ierr); 1209 } 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatCreateMPIDense" 1215 /*@C 1216 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1217 1218 Collective on MPI_Comm 1219 1220 Input Parameters: 1221 + comm - MPI communicator 1222 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1223 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1224 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1225 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1226 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1227 to control all matrix memory allocation. 1228 1229 Output Parameter: 1230 . A - the matrix 1231 1232 Notes: 1233 The dense format is fully compatible with standard Fortran 77 1234 storage by columns. 1235 1236 The data input variable is intended primarily for Fortran programmers 1237 who wish to allocate their own matrix memory space. Most users should 1238 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1239 1240 The user MUST specify either the local or global matrix dimensions 1241 (possibly both). 1242 1243 Level: intermediate 1244 1245 .keywords: matrix,dense, parallel 1246 1247 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1248 @*/ 1249 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1250 { 1251 PetscErrorCode ierr; 1252 PetscMPIInt size; 1253 1254 PetscFunctionBegin; 1255 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1256 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1257 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1258 if (size > 1) { 1259 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1260 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1261 } else { 1262 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1263 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1264 } 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDuplicate_MPIDense" 1270 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1271 { 1272 Mat mat; 1273 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 *newmat = 0; 1278 ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr); 1279 ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1280 ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr); 1281 a = (Mat_MPIDense*)mat->data; 1282 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1283 mat->factor = A->factor; 1284 mat->assembled = PETSC_TRUE; 1285 mat->preallocated = PETSC_TRUE; 1286 1287 mat->rmap.rstart = A->rmap.rstart; 1288 mat->rmap.rend = A->rmap.rend; 1289 a->size = oldmat->size; 1290 a->rank = oldmat->rank; 1291 mat->insertmode = NOT_SET_VALUES; 1292 a->nvec = oldmat->nvec; 1293 a->donotstash = oldmat->donotstash; 1294 1295 ierr = PetscMalloc((a->size+1)*sizeof(PetscInt),&mat->rmap.range);CHKERRQ(ierr); 1296 ierr = PetscMalloc((a->size+1)*sizeof(PetscInt),&mat->cmap.range);CHKERRQ(ierr); 1297 ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1298 ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1299 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1300 1301 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1302 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1303 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1304 *newmat = mat; 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #include "petscsys.h" 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1312 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1313 { 1314 PetscErrorCode ierr; 1315 PetscMPIInt rank,size; 1316 PetscInt *rowners,i,m,nz,j; 1317 PetscScalar *array,*vals,*vals_ptr; 1318 MPI_Status status; 1319 1320 PetscFunctionBegin; 1321 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1322 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1323 1324 /* determine ownership of all rows */ 1325 m = M/size + ((M % size) > rank); 1326 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1327 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1328 rowners[0] = 0; 1329 for (i=2; i<=size; i++) { 1330 rowners[i] += rowners[i-1]; 1331 } 1332 1333 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1334 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1335 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1336 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1337 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1338 1339 if (!rank) { 1340 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1341 1342 /* read in my part of the matrix numerical values */ 1343 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1344 1345 /* insert into matrix-by row (this is why cannot directly read into array */ 1346 vals_ptr = vals; 1347 for (i=0; i<m; i++) { 1348 for (j=0; j<N; j++) { 1349 array[i + j*m] = *vals_ptr++; 1350 } 1351 } 1352 1353 /* read in other processors and ship out */ 1354 for (i=1; i<size; i++) { 1355 nz = (rowners[i+1] - rowners[i])*N; 1356 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1357 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1358 } 1359 } else { 1360 /* receive numeric values */ 1361 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1362 1363 /* receive message of values*/ 1364 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1365 1366 /* insert into matrix-by row (this is why cannot directly read into array */ 1367 vals_ptr = vals; 1368 for (i=0; i<m; i++) { 1369 for (j=0; j<N; j++) { 1370 array[i + j*m] = *vals_ptr++; 1371 } 1372 } 1373 } 1374 ierr = PetscFree(rowners);CHKERRQ(ierr); 1375 ierr = PetscFree(vals);CHKERRQ(ierr); 1376 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatLoad_MPIDense" 1383 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1384 { 1385 Mat A; 1386 PetscScalar *vals,*svals; 1387 MPI_Comm comm = ((PetscObject)viewer)->comm; 1388 MPI_Status status; 1389 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1390 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1391 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1392 PetscInt i,nz,j,rstart,rend; 1393 int fd; 1394 PetscErrorCode ierr; 1395 1396 PetscFunctionBegin; 1397 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1398 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1399 if (!rank) { 1400 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1401 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1402 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1403 } 1404 1405 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1406 M = header[1]; N = header[2]; nz = header[3]; 1407 1408 /* 1409 Handle case where matrix is stored on disk as a dense matrix 1410 */ 1411 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1412 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 /* determine ownership of all rows */ 1417 m = M/size + ((M % size) > rank); 1418 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1419 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1420 rowners[0] = 0; 1421 for (i=2; i<=size; i++) { 1422 rowners[i] += rowners[i-1]; 1423 } 1424 rstart = rowners[rank]; 1425 rend = rowners[rank+1]; 1426 1427 /* distribute row lengths to all processors */ 1428 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1429 offlens = ourlens + (rend-rstart); 1430 if (!rank) { 1431 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1432 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1433 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1434 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1435 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1436 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1437 } else { 1438 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1439 } 1440 1441 if (!rank) { 1442 /* calculate the number of nonzeros on each processor */ 1443 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1444 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1445 for (i=0; i<size; i++) { 1446 for (j=rowners[i]; j< rowners[i+1]; j++) { 1447 procsnz[i] += rowlengths[j]; 1448 } 1449 } 1450 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1451 1452 /* determine max buffer needed and allocate it */ 1453 maxnz = 0; 1454 for (i=0; i<size; i++) { 1455 maxnz = PetscMax(maxnz,procsnz[i]); 1456 } 1457 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1458 1459 /* read in my part of the matrix column indices */ 1460 nz = procsnz[0]; 1461 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1462 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1463 1464 /* read in every one elses and ship off */ 1465 for (i=1; i<size; i++) { 1466 nz = procsnz[i]; 1467 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1468 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1469 } 1470 ierr = PetscFree(cols);CHKERRQ(ierr); 1471 } else { 1472 /* determine buffer space needed for message */ 1473 nz = 0; 1474 for (i=0; i<m; i++) { 1475 nz += ourlens[i]; 1476 } 1477 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1478 1479 /* receive message of column indices*/ 1480 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1481 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1482 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1483 } 1484 1485 /* loop over local rows, determining number of off diagonal entries */ 1486 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1487 jj = 0; 1488 for (i=0; i<m; i++) { 1489 for (j=0; j<ourlens[i]; j++) { 1490 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1491 jj++; 1492 } 1493 } 1494 1495 /* create our matrix */ 1496 for (i=0; i<m; i++) { 1497 ourlens[i] -= offlens[i]; 1498 } 1499 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1500 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1501 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1502 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1503 A = *newmat; 1504 for (i=0; i<m; i++) { 1505 ourlens[i] += offlens[i]; 1506 } 1507 1508 if (!rank) { 1509 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1510 1511 /* read in my part of the matrix numerical values */ 1512 nz = procsnz[0]; 1513 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1514 1515 /* insert into matrix */ 1516 jj = rstart; 1517 smycols = mycols; 1518 svals = vals; 1519 for (i=0; i<m; i++) { 1520 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1521 smycols += ourlens[i]; 1522 svals += ourlens[i]; 1523 jj++; 1524 } 1525 1526 /* read in other processors and ship out */ 1527 for (i=1; i<size; i++) { 1528 nz = procsnz[i]; 1529 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1530 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1531 } 1532 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1533 } else { 1534 /* receive numeric values */ 1535 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1536 1537 /* receive message of values*/ 1538 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1539 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1540 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1541 1542 /* insert into matrix */ 1543 jj = rstart; 1544 smycols = mycols; 1545 svals = vals; 1546 for (i=0; i<m; i++) { 1547 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1548 smycols += ourlens[i]; 1549 svals += ourlens[i]; 1550 jj++; 1551 } 1552 } 1553 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1554 ierr = PetscFree(vals);CHKERRQ(ierr); 1555 ierr = PetscFree(mycols);CHKERRQ(ierr); 1556 ierr = PetscFree(rowners);CHKERRQ(ierr); 1557 1558 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1559 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1560 PetscFunctionReturn(0); 1561 } 1562 1563 1564 1565 1566 1567