1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact) 12 { 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 17 PetscFunctionReturn(0); 18 } 19 20 #undef __FUNCT__ 21 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 22 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 23 { 24 PetscErrorCode ierr; 25 26 PetscFunctionBegin; 27 ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 28 PetscFunctionReturn(0); 29 } 30 31 #undef __FUNCT__ 32 #define __FUNCT__ "MatDenseGetLocalMatrix" 33 /*@ 34 35 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 36 matrix that represents the operator. For sequential matrices it returns itself. 37 38 Input Parameter: 39 . A - the Seq or MPI dense matrix 40 41 Output Parameter: 42 . B - the inner matrix 43 44 Level: intermediate 45 46 @*/ 47 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 48 { 49 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 50 PetscErrorCode ierr; 51 PetscTruth flg; 52 PetscMPIInt size; 53 54 PetscFunctionBegin; 55 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 56 if (!flg) { /* this check sucks! */ 57 ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr); 58 if (flg) { 59 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 60 if (size == 1) flg = PETSC_FALSE; 61 } 62 } 63 if (flg) { 64 *B = mat->A; 65 } else { 66 *B = A; 67 } 68 PetscFunctionReturn(0); 69 } 70 71 #undef __FUNCT__ 72 #define __FUNCT__ "MatGetRow_MPIDense" 73 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 74 { 75 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 76 PetscErrorCode ierr; 77 PetscInt lrow,rstart = mat->rstart,rend = mat->rend; 78 79 PetscFunctionBegin; 80 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 81 lrow = row - rstart; 82 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 86 #undef __FUNCT__ 87 #define __FUNCT__ "MatRestoreRow_MPIDense" 88 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 89 { 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 94 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 95 PetscFunctionReturn(0); 96 } 97 98 EXTERN_C_BEGIN 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 101 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 102 { 103 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 104 PetscErrorCode ierr; 105 PetscInt m = A->m,rstart = mdn->rstart; 106 PetscScalar *array; 107 MPI_Comm comm; 108 109 PetscFunctionBegin; 110 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 111 112 /* The reuse aspect is not implemented efficiently */ 113 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 114 115 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 116 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 117 ierr = MatCreate(comm,B);CHKERRQ(ierr); 118 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 119 ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr); 120 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 121 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 122 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 123 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 124 125 *iscopy = PETSC_TRUE; 126 PetscFunctionReturn(0); 127 } 128 EXTERN_C_END 129 130 #undef __FUNCT__ 131 #define __FUNCT__ "MatSetValues_MPIDense" 132 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 133 { 134 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 135 PetscErrorCode ierr; 136 PetscInt i,j,rstart = A->rstart,rend = A->rend,row; 137 PetscTruth roworiented = A->roworiented; 138 139 PetscFunctionBegin; 140 for (i=0; i<m; i++) { 141 if (idxm[i] < 0) continue; 142 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 if (roworiented) { 146 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 147 } else { 148 for (j=0; j<n; j++) { 149 if (idxn[j] < 0) continue; 150 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 151 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 152 } 153 } 154 } else { 155 if (!A->donotstash) { 156 if (roworiented) { 157 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 158 } else { 159 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 160 } 161 } 162 } 163 } 164 PetscFunctionReturn(0); 165 } 166 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatGetValues_MPIDense" 169 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 170 { 171 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 172 PetscErrorCode ierr; 173 PetscInt i,j,rstart = mdn->rstart,rend = mdn->rend,row; 174 175 PetscFunctionBegin; 176 for (i=0; i<m; i++) { 177 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 178 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 179 if (idxm[i] >= rstart && idxm[i] < rend) { 180 row = idxm[i] - rstart; 181 for (j=0; j<n; j++) { 182 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 183 if (idxn[j] >= mat->N) { 184 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 185 } 186 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 187 } 188 } else { 189 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 190 } 191 } 192 PetscFunctionReturn(0); 193 } 194 195 #undef __FUNCT__ 196 #define __FUNCT__ "MatGetArray_MPIDense" 197 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 198 { 199 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 200 PetscErrorCode ierr; 201 202 PetscFunctionBegin; 203 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 209 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 210 { 211 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 212 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 213 PetscErrorCode ierr; 214 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 215 PetscScalar *av,*bv,*v = lmat->v; 216 Mat newmat; 217 218 PetscFunctionBegin; 219 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 220 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 221 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 222 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 223 224 /* No parallel redistribution currently supported! Should really check each index set 225 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 226 original matrix! */ 227 228 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 229 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 230 231 /* Check submatrix call */ 232 if (scall == MAT_REUSE_MATRIX) { 233 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 234 /* Really need to test rows and column sizes! */ 235 newmat = *B; 236 } else { 237 /* Create and fill new matrix */ 238 ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr); 239 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 240 ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr); 241 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 242 } 243 244 /* Now extract the data pointers and do the copy, column at a time */ 245 newmatd = (Mat_MPIDense*)newmat->data; 246 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 247 248 for (i=0; i<ncols; i++) { 249 av = v + nlrows*icol[i]; 250 for (j=0; j<nrows; j++) { 251 *bv++ = av[irow[j] - rstart]; 252 } 253 } 254 255 /* Assemble the matrices so that the correct flags are set */ 256 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 257 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 258 259 /* Free work space */ 260 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 261 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 262 *B = newmat; 263 PetscFunctionReturn(0); 264 } 265 266 #undef __FUNCT__ 267 #define __FUNCT__ "MatRestoreArray_MPIDense" 268 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 269 { 270 PetscFunctionBegin; 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 276 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 277 { 278 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 279 MPI_Comm comm = mat->comm; 280 PetscErrorCode ierr; 281 PetscInt nstash,reallocs; 282 InsertMode addv; 283 284 PetscFunctionBegin; 285 /* make sure all processors are either in INSERTMODE or ADDMODE */ 286 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 287 if (addv == (ADD_VALUES|INSERT_VALUES)) { 288 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 289 } 290 mat->insertmode = addv; /* in case this processor had no cache */ 291 292 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 293 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 294 ierr = PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 295 PetscFunctionReturn(0); 296 } 297 298 #undef __FUNCT__ 299 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 300 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 301 { 302 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 303 PetscErrorCode ierr; 304 PetscInt i,*row,*col,flg,j,rstart,ncols; 305 PetscMPIInt n; 306 PetscScalar *val; 307 InsertMode addv=mat->insertmode; 308 309 PetscFunctionBegin; 310 /* wait on receives */ 311 while (1) { 312 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 313 if (!flg) break; 314 315 for (i=0; i<n;) { 316 /* Now identify the consecutive vals belonging to the same row */ 317 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 318 if (j < n) ncols = j-i; 319 else ncols = n-i; 320 /* Now assemble all these values with a single function call */ 321 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 322 i = j; 323 } 324 } 325 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 326 327 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 328 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 329 330 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 331 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 332 } 333 PetscFunctionReturn(0); 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "MatZeroEntries_MPIDense" 338 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 339 { 340 PetscErrorCode ierr; 341 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 342 343 PetscFunctionBegin; 344 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 /* the code does not do the diagonal entries correctly unless the 349 matrix is square and the column and row owerships are identical. 350 This is a BUG. The only way to fix it seems to be to access 351 mdn->A and mdn->B directly and not through the MatZeroRows() 352 routine. 353 */ 354 #undef __FUNCT__ 355 #define __FUNCT__ "MatZeroRows_MPIDense" 356 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 357 { 358 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 359 PetscErrorCode ierr; 360 PetscInt i,*owners = l->rowners; 361 PetscInt *nprocs,j,idx,nsends; 362 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 363 PetscInt *rvalues,tag = A->tag,count,base,slen,*source; 364 PetscInt *lens,*lrows,*values; 365 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 366 MPI_Comm comm = A->comm; 367 MPI_Request *send_waits,*recv_waits; 368 MPI_Status recv_status,*send_status; 369 PetscTruth found; 370 371 PetscFunctionBegin; 372 /* first count number of contributors to each processor */ 373 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 374 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 375 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 376 for (i=0; i<N; i++) { 377 idx = rows[i]; 378 found = PETSC_FALSE; 379 for (j=0; j<size; j++) { 380 if (idx >= owners[j] && idx < owners[j+1]) { 381 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 382 } 383 } 384 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 385 } 386 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 387 388 /* inform other processors of number of messages and max length*/ 389 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 390 391 /* post receives: */ 392 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 393 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 394 for (i=0; i<nrecvs; i++) { 395 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 396 } 397 398 /* do sends: 399 1) starts[i] gives the starting index in svalues for stuff going to 400 the ith processor 401 */ 402 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 403 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 404 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 405 starts[0] = 0; 406 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 407 for (i=0; i<N; i++) { 408 svalues[starts[owner[i]]++] = rows[i]; 409 } 410 411 starts[0] = 0; 412 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 413 count = 0; 414 for (i=0; i<size; i++) { 415 if (nprocs[2*i+1]) { 416 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 417 } 418 } 419 ierr = PetscFree(starts);CHKERRQ(ierr); 420 421 base = owners[rank]; 422 423 /* wait on receives */ 424 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 425 source = lens + nrecvs; 426 count = nrecvs; slen = 0; 427 while (count) { 428 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 429 /* unpack receives into our local space */ 430 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 431 source[imdex] = recv_status.MPI_SOURCE; 432 lens[imdex] = n; 433 slen += n; 434 count--; 435 } 436 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 437 438 /* move the data into the send scatter */ 439 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 440 count = 0; 441 for (i=0; i<nrecvs; i++) { 442 values = rvalues + i*nmax; 443 for (j=0; j<lens[i]; j++) { 444 lrows[count++] = values[j] - base; 445 } 446 } 447 ierr = PetscFree(rvalues);CHKERRQ(ierr); 448 ierr = PetscFree(lens);CHKERRQ(ierr); 449 ierr = PetscFree(owner);CHKERRQ(ierr); 450 ierr = PetscFree(nprocs);CHKERRQ(ierr); 451 452 /* actually zap the local rows */ 453 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 454 ierr = PetscFree(lrows);CHKERRQ(ierr); 455 456 /* wait on sends */ 457 if (nsends) { 458 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 459 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 460 ierr = PetscFree(send_status);CHKERRQ(ierr); 461 } 462 ierr = PetscFree(send_waits);CHKERRQ(ierr); 463 ierr = PetscFree(svalues);CHKERRQ(ierr); 464 465 PetscFunctionReturn(0); 466 } 467 468 #undef __FUNCT__ 469 #define __FUNCT__ "MatMult_MPIDense" 470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 471 { 472 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 473 PetscErrorCode ierr; 474 475 PetscFunctionBegin; 476 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 477 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 478 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatMultAdd_MPIDense" 484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 491 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 492 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatMultTranspose_MPIDense" 498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 499 { 500 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 501 PetscErrorCode ierr; 502 PetscScalar zero = 0.0; 503 504 PetscFunctionBegin; 505 ierr = VecSet(yy,zero);CHKERRQ(ierr); 506 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 507 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 508 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 515 { 516 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 517 PetscErrorCode ierr; 518 519 PetscFunctionBegin; 520 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 521 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 522 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 523 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 524 PetscFunctionReturn(0); 525 } 526 527 #undef __FUNCT__ 528 #define __FUNCT__ "MatGetDiagonal_MPIDense" 529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 530 { 531 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 532 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 533 PetscErrorCode ierr; 534 PetscInt len,i,n,m = A->m,radd; 535 PetscScalar *x,zero = 0.0; 536 537 PetscFunctionBegin; 538 ierr = VecSet(v,zero);CHKERRQ(ierr); 539 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 540 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 541 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 542 len = PetscMin(a->A->m,a->A->n); 543 radd = a->rstart*m; 544 for (i=0; i<len; i++) { 545 x[i] = aloc->v[radd + i*m + i]; 546 } 547 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatDestroy_MPIDense" 553 PetscErrorCode MatDestroy_MPIDense(Mat mat) 554 { 555 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 556 PetscErrorCode ierr; 557 558 PetscFunctionBegin; 559 560 #if defined(PETSC_USE_LOG) 561 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 562 #endif 563 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 564 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 565 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 566 if (mdn->lvec) VecDestroy(mdn->lvec); 567 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 568 if (mdn->factor) { 569 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 570 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 571 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 572 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 573 } 574 ierr = PetscFree(mdn);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatView_MPIDense_Binary" 582 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 583 { 584 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 585 PetscErrorCode ierr; 586 587 PetscFunctionBegin; 588 if (mdn->size == 1) { 589 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 590 } 591 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 592 PetscFunctionReturn(0); 593 } 594 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 597 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 598 { 599 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 600 PetscErrorCode ierr; 601 PetscMPIInt size = mdn->size,rank = mdn->rank; 602 PetscViewerType vtype; 603 PetscTruth iascii,isdraw; 604 PetscViewer sviewer; 605 PetscViewerFormat format; 606 607 PetscFunctionBegin; 608 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 609 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 610 if (iascii) { 611 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 612 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 613 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 614 MatInfo info; 615 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 616 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m, 617 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 618 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 619 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } else if (format == PETSC_VIEWER_ASCII_INFO) { 622 PetscFunctionReturn(0); 623 } 624 } else if (isdraw) { 625 PetscDraw draw; 626 PetscTruth isnull; 627 628 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 629 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 630 if (isnull) PetscFunctionReturn(0); 631 } 632 633 if (size == 1) { 634 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 635 } else { 636 /* assemble the entire matrix onto first processor. */ 637 Mat A; 638 PetscInt M = mat->M,N = mat->N,m,row,i,nz; 639 PetscInt *cols; 640 PetscScalar *vals; 641 642 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 643 if (!rank) { 644 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 645 } else { 646 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 647 } 648 /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */ 649 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 650 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 651 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 652 653 /* Copy the matrix ... This isn't the most efficient means, 654 but it's quick for now */ 655 A->insertmode = INSERT_VALUES; 656 row = mdn->rstart; m = mdn->A->m; 657 for (i=0; i<m; i++) { 658 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 659 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 660 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 661 row++; 662 } 663 664 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 666 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 667 if (!rank) { 668 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 669 } 670 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 671 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 672 ierr = MatDestroy(A);CHKERRQ(ierr); 673 } 674 PetscFunctionReturn(0); 675 } 676 677 #undef __FUNCT__ 678 #define __FUNCT__ "MatView_MPIDense" 679 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 680 { 681 PetscErrorCode ierr; 682 PetscTruth iascii,isbinary,isdraw,issocket; 683 684 PetscFunctionBegin; 685 686 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 687 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 688 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 689 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 690 691 if (iascii || issocket || isdraw) { 692 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 693 } else if (isbinary) { 694 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 695 } else { 696 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatGetInfo_MPIDense" 703 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 704 { 705 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 706 Mat mdn = mat->A; 707 PetscErrorCode ierr; 708 PetscReal isend[5],irecv[5]; 709 710 PetscFunctionBegin; 711 info->rows_global = (double)A->M; 712 info->columns_global = (double)A->N; 713 info->rows_local = (double)A->m; 714 info->columns_local = (double)A->N; 715 info->block_size = 1.0; 716 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 717 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 718 isend[3] = info->memory; isend[4] = info->mallocs; 719 if (flag == MAT_LOCAL) { 720 info->nz_used = isend[0]; 721 info->nz_allocated = isend[1]; 722 info->nz_unneeded = isend[2]; 723 info->memory = isend[3]; 724 info->mallocs = isend[4]; 725 } else if (flag == MAT_GLOBAL_MAX) { 726 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 727 info->nz_used = irecv[0]; 728 info->nz_allocated = irecv[1]; 729 info->nz_unneeded = irecv[2]; 730 info->memory = irecv[3]; 731 info->mallocs = irecv[4]; 732 } else if (flag == MAT_GLOBAL_SUM) { 733 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 734 info->nz_used = irecv[0]; 735 info->nz_allocated = irecv[1]; 736 info->nz_unneeded = irecv[2]; 737 info->memory = irecv[3]; 738 info->mallocs = irecv[4]; 739 } 740 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 741 info->fill_ratio_needed = 0; 742 info->factor_mallocs = 0; 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatSetOption_MPIDense" 748 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op) 749 { 750 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 751 PetscErrorCode ierr; 752 753 PetscFunctionBegin; 754 switch (op) { 755 case MAT_NO_NEW_NONZERO_LOCATIONS: 756 case MAT_YES_NEW_NONZERO_LOCATIONS: 757 case MAT_NEW_NONZERO_LOCATION_ERR: 758 case MAT_NEW_NONZERO_ALLOCATION_ERR: 759 case MAT_COLUMNS_SORTED: 760 case MAT_COLUMNS_UNSORTED: 761 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 762 break; 763 case MAT_ROW_ORIENTED: 764 a->roworiented = PETSC_TRUE; 765 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 766 break; 767 case MAT_ROWS_SORTED: 768 case MAT_ROWS_UNSORTED: 769 case MAT_YES_NEW_DIAGONALS: 770 case MAT_USE_HASH_TABLE: 771 ierr = PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));CHKERRQ(ierr); 772 break; 773 case MAT_COLUMN_ORIENTED: 774 a->roworiented = PETSC_FALSE; 775 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 776 break; 777 case MAT_IGNORE_OFF_PROC_ENTRIES: 778 a->donotstash = PETSC_TRUE; 779 break; 780 case MAT_NO_NEW_DIAGONALS: 781 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 782 case MAT_SYMMETRIC: 783 case MAT_STRUCTURALLY_SYMMETRIC: 784 case MAT_NOT_SYMMETRIC: 785 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 786 case MAT_HERMITIAN: 787 case MAT_NOT_HERMITIAN: 788 case MAT_SYMMETRY_ETERNAL: 789 case MAT_NOT_SYMMETRY_ETERNAL: 790 break; 791 default: 792 SETERRQ(PETSC_ERR_SUP,"unknown option"); 793 } 794 PetscFunctionReturn(0); 795 } 796 797 798 #undef __FUNCT__ 799 #define __FUNCT__ "MatDiagonalScale_MPIDense" 800 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 801 { 802 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 803 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 804 PetscScalar *l,*r,x,*v; 805 PetscErrorCode ierr; 806 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 807 808 PetscFunctionBegin; 809 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 810 if (ll) { 811 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 812 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 813 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 814 for (i=0; i<m; i++) { 815 x = l[i]; 816 v = mat->v + i; 817 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 818 } 819 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 820 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 821 } 822 if (rr) { 823 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 824 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 825 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 826 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 827 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 828 for (i=0; i<n; i++) { 829 x = r[i]; 830 v = mat->v + i*m; 831 for (j=0; j<m; j++) { (*v++) *= x;} 832 } 833 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 834 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 835 } 836 PetscFunctionReturn(0); 837 } 838 839 #undef __FUNCT__ 840 #define __FUNCT__ "MatNorm_MPIDense" 841 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 842 { 843 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 844 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 845 PetscErrorCode ierr; 846 PetscInt i,j; 847 PetscReal sum = 0.0; 848 PetscScalar *v = mat->v; 849 850 PetscFunctionBegin; 851 if (mdn->size == 1) { 852 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 853 } else { 854 if (type == NORM_FROBENIUS) { 855 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 856 #if defined(PETSC_USE_COMPLEX) 857 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 858 #else 859 sum += (*v)*(*v); v++; 860 #endif 861 } 862 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 863 *nrm = sqrt(*nrm); 864 ierr = PetscLogFlops(2*mdn->A->n*mdn->A->m);CHKERRQ(ierr); 865 } else if (type == NORM_1) { 866 PetscReal *tmp,*tmp2; 867 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 868 tmp2 = tmp + A->N; 869 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 870 *nrm = 0.0; 871 v = mat->v; 872 for (j=0; j<mdn->A->n; j++) { 873 for (i=0; i<mdn->A->m; i++) { 874 tmp[j] += PetscAbsScalar(*v); v++; 875 } 876 } 877 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 878 for (j=0; j<A->N; j++) { 879 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 880 } 881 ierr = PetscFree(tmp);CHKERRQ(ierr); 882 ierr = PetscLogFlops(A->n*A->m);CHKERRQ(ierr); 883 } else if (type == NORM_INFINITY) { /* max row norm */ 884 PetscReal ntemp; 885 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 886 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 887 } else { 888 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 889 } 890 } 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatTranspose_MPIDense" 896 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout) 897 { 898 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 899 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 900 Mat B; 901 PetscInt M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 902 PetscErrorCode ierr; 903 PetscInt j,i; 904 PetscScalar *v; 905 906 PetscFunctionBegin; 907 if (!matout && M != N) { 908 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 909 } 910 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 911 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 912 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 913 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 914 915 m = a->A->m; n = a->A->n; v = Aloc->v; 916 ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 917 for (j=0; j<n; j++) { 918 for (i=0; i<m; i++) rwork[i] = rstart + i; 919 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 920 v += m; 921 } 922 ierr = PetscFree(rwork);CHKERRQ(ierr); 923 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 924 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 925 if (matout) { 926 *matout = B; 927 } else { 928 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 #include "petscblaslapack.h" 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatScale_MPIDense" 936 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 937 { 938 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 939 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 940 PetscScalar oalpha = alpha; 941 PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N; 942 PetscErrorCode ierr; 943 944 PetscFunctionBegin; 945 BLASscal_(&nz,&oalpha,a->v,&one); 946 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 947 PetscFunctionReturn(0); 948 } 949 950 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 954 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 955 { 956 PetscErrorCode ierr; 957 958 PetscFunctionBegin; 959 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } 962 963 /* -------------------------------------------------------------------*/ 964 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 965 MatGetRow_MPIDense, 966 MatRestoreRow_MPIDense, 967 MatMult_MPIDense, 968 /* 4*/ MatMultAdd_MPIDense, 969 MatMultTranspose_MPIDense, 970 MatMultTransposeAdd_MPIDense, 971 0, 972 0, 973 0, 974 /*10*/ 0, 975 0, 976 0, 977 0, 978 MatTranspose_MPIDense, 979 /*15*/ MatGetInfo_MPIDense, 980 0, 981 MatGetDiagonal_MPIDense, 982 MatDiagonalScale_MPIDense, 983 MatNorm_MPIDense, 984 /*20*/ MatAssemblyBegin_MPIDense, 985 MatAssemblyEnd_MPIDense, 986 0, 987 MatSetOption_MPIDense, 988 MatZeroEntries_MPIDense, 989 /*25*/ MatZeroRows_MPIDense, 990 MatLUFactorSymbolic_MPIDense, 991 0, 992 MatCholeskyFactorSymbolic_MPIDense, 993 0, 994 /*30*/ MatSetUpPreallocation_MPIDense, 995 0, 996 0, 997 MatGetArray_MPIDense, 998 MatRestoreArray_MPIDense, 999 /*35*/ MatDuplicate_MPIDense, 1000 0, 1001 0, 1002 0, 1003 0, 1004 /*40*/ 0, 1005 MatGetSubMatrices_MPIDense, 1006 0, 1007 MatGetValues_MPIDense, 1008 0, 1009 /*45*/ 0, 1010 MatScale_MPIDense, 1011 0, 1012 0, 1013 0, 1014 /*50*/ 0, 1015 0, 1016 0, 1017 0, 1018 0, 1019 /*55*/ 0, 1020 0, 1021 0, 1022 0, 1023 0, 1024 /*60*/ MatGetSubMatrix_MPIDense, 1025 MatDestroy_MPIDense, 1026 MatView_MPIDense, 1027 MatGetPetscMaps_Petsc, 1028 0, 1029 /*65*/ 0, 1030 0, 1031 0, 1032 0, 1033 0, 1034 /*70*/ 0, 1035 0, 1036 0, 1037 0, 1038 0, 1039 /*75*/ 0, 1040 0, 1041 0, 1042 0, 1043 0, 1044 /*80*/ 0, 1045 0, 1046 0, 1047 0, 1048 /*84*/ MatLoad_MPIDense, 1049 0, 1050 0, 1051 0, 1052 0, 1053 0, 1054 /*90*/ 0, 1055 0, 1056 0, 1057 0, 1058 0, 1059 /*95*/ 0, 1060 0, 1061 0, 1062 0}; 1063 1064 EXTERN_C_BEGIN 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1067 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1068 { 1069 Mat_MPIDense *a; 1070 PetscErrorCode ierr; 1071 1072 PetscFunctionBegin; 1073 mat->preallocated = PETSC_TRUE; 1074 /* Note: For now, when data is specified above, this assumes the user correctly 1075 allocates the local dense storage space. We should add error checking. */ 1076 1077 a = (Mat_MPIDense*)mat->data; 1078 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1079 ierr = MatSetSizes(a->A,mat->m,mat->N,mat->m,mat->N);CHKERRQ(ierr); 1080 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1081 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1082 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 EXTERN_C_END 1086 1087 /*MC 1088 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1089 1090 Options Database Keys: 1091 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1092 1093 Level: beginner 1094 1095 .seealso: MatCreateMPIDense 1096 M*/ 1097 1098 EXTERN_C_BEGIN 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatCreate_MPIDense" 1101 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1102 { 1103 Mat_MPIDense *a; 1104 PetscErrorCode ierr; 1105 PetscInt i; 1106 1107 PetscFunctionBegin; 1108 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1109 mat->data = (void*)a; 1110 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1111 mat->factor = 0; 1112 mat->mapping = 0; 1113 1114 a->factor = 0; 1115 mat->insertmode = NOT_SET_VALUES; 1116 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 1117 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 1118 1119 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 1120 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 1121 a->nvec = mat->n; 1122 1123 /* the information in the maps duplicates the information computed below, eventually 1124 we should remove the duplicate information that is not contained in the maps */ 1125 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 1126 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 1127 1128 /* build local table of row and column ownerships */ 1129 ierr = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr); 1130 a->cowners = a->rowners + a->size + 1; 1131 ierr = PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr); 1132 ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr); 1133 a->rowners[0] = 0; 1134 for (i=2; i<=a->size; i++) { 1135 a->rowners[i] += a->rowners[i-1]; 1136 } 1137 a->rstart = a->rowners[a->rank]; 1138 a->rend = a->rowners[a->rank+1]; 1139 ierr = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr); 1140 a->cowners[0] = 0; 1141 for (i=2; i<=a->size; i++) { 1142 a->cowners[i] += a->cowners[i-1]; 1143 } 1144 1145 /* build cache for off array entries formed */ 1146 a->donotstash = PETSC_FALSE; 1147 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1148 1149 /* stuff used for matrix vector multiply */ 1150 a->lvec = 0; 1151 a->Mvctx = 0; 1152 a->roworiented = PETSC_TRUE; 1153 1154 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1155 "MatGetDiagonalBlock_MPIDense", 1156 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1157 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1158 "MatMPIDenseSetPreallocation_MPIDense", 1159 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 EXTERN_C_END 1163 1164 /*MC 1165 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1166 1167 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1168 and MATMPIDENSE otherwise. 1169 1170 Options Database Keys: 1171 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1172 1173 Level: beginner 1174 1175 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1176 M*/ 1177 1178 EXTERN_C_BEGIN 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatCreate_Dense" 1181 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1182 { 1183 PetscErrorCode ierr; 1184 PetscMPIInt size; 1185 1186 PetscFunctionBegin; 1187 ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr); 1188 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1189 if (size == 1) { 1190 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1191 } else { 1192 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1193 } 1194 PetscFunctionReturn(0); 1195 } 1196 EXTERN_C_END 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1200 /*@C 1201 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1202 1203 Not collective 1204 1205 Input Parameters: 1206 . A - the matrix 1207 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1208 to control all matrix memory allocation. 1209 1210 Notes: 1211 The dense format is fully compatible with standard Fortran 77 1212 storage by columns. 1213 1214 The data input variable is intended primarily for Fortran programmers 1215 who wish to allocate their own matrix memory space. Most users should 1216 set data=PETSC_NULL. 1217 1218 Level: intermediate 1219 1220 .keywords: matrix,dense, parallel 1221 1222 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1223 @*/ 1224 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1225 { 1226 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1227 1228 PetscFunctionBegin; 1229 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1230 if (f) { 1231 ierr = (*f)(mat,data);CHKERRQ(ierr); 1232 } 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatCreateMPIDense" 1238 /*@C 1239 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1240 1241 Collective on MPI_Comm 1242 1243 Input Parameters: 1244 + comm - MPI communicator 1245 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1246 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1247 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1248 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1249 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1250 to control all matrix memory allocation. 1251 1252 Output Parameter: 1253 . A - the matrix 1254 1255 Notes: 1256 The dense format is fully compatible with standard Fortran 77 1257 storage by columns. 1258 1259 The data input variable is intended primarily for Fortran programmers 1260 who wish to allocate their own matrix memory space. Most users should 1261 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1262 1263 The user MUST specify either the local or global matrix dimensions 1264 (possibly both). 1265 1266 Level: intermediate 1267 1268 .keywords: matrix,dense, parallel 1269 1270 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1271 @*/ 1272 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1273 { 1274 PetscErrorCode ierr; 1275 PetscMPIInt size; 1276 1277 PetscFunctionBegin; 1278 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1279 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1280 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1281 if (size > 1) { 1282 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1283 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1284 } else { 1285 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1286 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1287 } 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatDuplicate_MPIDense" 1293 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1294 { 1295 Mat mat; 1296 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1297 PetscErrorCode ierr; 1298 1299 PetscFunctionBegin; 1300 *newmat = 0; 1301 ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr); 1302 ierr = MatSetSizes(mat,A->m,A->n,A->M,A->N);CHKERRQ(ierr); 1303 ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr); 1304 a = (Mat_MPIDense*)mat->data; 1305 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1306 mat->factor = A->factor; 1307 mat->assembled = PETSC_TRUE; 1308 mat->preallocated = PETSC_TRUE; 1309 1310 a->rstart = oldmat->rstart; 1311 a->rend = oldmat->rend; 1312 a->size = oldmat->size; 1313 a->rank = oldmat->rank; 1314 mat->insertmode = NOT_SET_VALUES; 1315 a->nvec = oldmat->nvec; 1316 a->donotstash = oldmat->donotstash; 1317 1318 ierr = PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr); 1319 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1320 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1321 1322 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1323 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1324 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1325 *newmat = mat; 1326 PetscFunctionReturn(0); 1327 } 1328 1329 #include "petscsys.h" 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1333 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1334 { 1335 PetscErrorCode ierr; 1336 PetscMPIInt rank,size; 1337 PetscInt *rowners,i,m,nz,j; 1338 PetscScalar *array,*vals,*vals_ptr; 1339 MPI_Status status; 1340 1341 PetscFunctionBegin; 1342 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1343 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1344 1345 /* determine ownership of all rows */ 1346 m = M/size + ((M % size) > rank); 1347 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1348 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1349 rowners[0] = 0; 1350 for (i=2; i<=size; i++) { 1351 rowners[i] += rowners[i-1]; 1352 } 1353 1354 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1355 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1356 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1357 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1358 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1359 1360 if (!rank) { 1361 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1362 1363 /* read in my part of the matrix numerical values */ 1364 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1365 1366 /* insert into matrix-by row (this is why cannot directly read into array */ 1367 vals_ptr = vals; 1368 for (i=0; i<m; i++) { 1369 for (j=0; j<N; j++) { 1370 array[i + j*m] = *vals_ptr++; 1371 } 1372 } 1373 1374 /* read in other processors and ship out */ 1375 for (i=1; i<size; i++) { 1376 nz = (rowners[i+1] - rowners[i])*N; 1377 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1378 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1379 } 1380 } else { 1381 /* receive numeric values */ 1382 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1383 1384 /* receive message of values*/ 1385 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1386 1387 /* insert into matrix-by row (this is why cannot directly read into array */ 1388 vals_ptr = vals; 1389 for (i=0; i<m; i++) { 1390 for (j=0; j<N; j++) { 1391 array[i + j*m] = *vals_ptr++; 1392 } 1393 } 1394 } 1395 ierr = PetscFree(rowners);CHKERRQ(ierr); 1396 ierr = PetscFree(vals);CHKERRQ(ierr); 1397 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1398 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatLoad_MPIDense" 1404 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1405 { 1406 Mat A; 1407 PetscScalar *vals,*svals; 1408 MPI_Comm comm = ((PetscObject)viewer)->comm; 1409 MPI_Status status; 1410 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1411 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1412 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1413 PetscInt i,nz,j,rstart,rend; 1414 int fd; 1415 PetscErrorCode ierr; 1416 1417 PetscFunctionBegin; 1418 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1419 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1420 if (!rank) { 1421 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1422 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1423 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1424 } 1425 1426 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1427 M = header[1]; N = header[2]; nz = header[3]; 1428 1429 /* 1430 Handle case where matrix is stored on disk as a dense matrix 1431 */ 1432 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1433 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 /* determine ownership of all rows */ 1438 m = M/size + ((M % size) > rank); 1439 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1440 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1441 rowners[0] = 0; 1442 for (i=2; i<=size; i++) { 1443 rowners[i] += rowners[i-1]; 1444 } 1445 rstart = rowners[rank]; 1446 rend = rowners[rank+1]; 1447 1448 /* distribute row lengths to all processors */ 1449 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1450 offlens = ourlens + (rend-rstart); 1451 if (!rank) { 1452 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1453 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1454 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1455 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1456 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1457 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1458 } else { 1459 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1460 } 1461 1462 if (!rank) { 1463 /* calculate the number of nonzeros on each processor */ 1464 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1465 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1466 for (i=0; i<size; i++) { 1467 for (j=rowners[i]; j< rowners[i+1]; j++) { 1468 procsnz[i] += rowlengths[j]; 1469 } 1470 } 1471 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1472 1473 /* determine max buffer needed and allocate it */ 1474 maxnz = 0; 1475 for (i=0; i<size; i++) { 1476 maxnz = PetscMax(maxnz,procsnz[i]); 1477 } 1478 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1479 1480 /* read in my part of the matrix column indices */ 1481 nz = procsnz[0]; 1482 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1483 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1484 1485 /* read in every one elses and ship off */ 1486 for (i=1; i<size; i++) { 1487 nz = procsnz[i]; 1488 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1489 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1490 } 1491 ierr = PetscFree(cols);CHKERRQ(ierr); 1492 } else { 1493 /* determine buffer space needed for message */ 1494 nz = 0; 1495 for (i=0; i<m; i++) { 1496 nz += ourlens[i]; 1497 } 1498 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1499 1500 /* receive message of column indices*/ 1501 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1502 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1503 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1504 } 1505 1506 /* loop over local rows, determining number of off diagonal entries */ 1507 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1508 jj = 0; 1509 for (i=0; i<m; i++) { 1510 for (j=0; j<ourlens[i]; j++) { 1511 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1512 jj++; 1513 } 1514 } 1515 1516 /* create our matrix */ 1517 for (i=0; i<m; i++) { 1518 ourlens[i] -= offlens[i]; 1519 } 1520 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1521 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1522 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1523 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1524 A = *newmat; 1525 for (i=0; i<m; i++) { 1526 ourlens[i] += offlens[i]; 1527 } 1528 1529 if (!rank) { 1530 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1531 1532 /* read in my part of the matrix numerical values */ 1533 nz = procsnz[0]; 1534 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1535 1536 /* insert into matrix */ 1537 jj = rstart; 1538 smycols = mycols; 1539 svals = vals; 1540 for (i=0; i<m; i++) { 1541 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1542 smycols += ourlens[i]; 1543 svals += ourlens[i]; 1544 jj++; 1545 } 1546 1547 /* read in other processors and ship out */ 1548 for (i=1; i<size; i++) { 1549 nz = procsnz[i]; 1550 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1551 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1552 } 1553 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1554 } else { 1555 /* receive numeric values */ 1556 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1557 1558 /* receive message of values*/ 1559 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1560 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1561 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1562 1563 /* insert into matrix */ 1564 jj = rstart; 1565 smycols = mycols; 1566 svals = vals; 1567 for (i=0; i<m; i++) { 1568 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1569 smycols += ourlens[i]; 1570 svals += ourlens[i]; 1571 jj++; 1572 } 1573 } 1574 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1575 ierr = PetscFree(vals);CHKERRQ(ierr); 1576 ierr = PetscFree(mycols);CHKERRQ(ierr); 1577 ierr = PetscFree(rowners);CHKERRQ(ierr); 1578 1579 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1580 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1581 PetscFunctionReturn(0); 1582 } 1583 1584 1585 1586 1587 1588