1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact) 12 { 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 17 PetscFunctionReturn(0); 18 } 19 20 #undef __FUNCT__ 21 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 22 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info_dummy,Mat *fact) 23 { 24 PetscFunctionBegin; 25 SETERRQ(PETSC_ERR_SUP,"No support of numerical factorization for mpidense matrix type"); 26 PetscFunctionReturn(0); 27 } 28 29 #undef __FUNCT__ 30 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 31 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 32 { 33 PetscErrorCode ierr; 34 35 PetscFunctionBegin; 36 ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 37 PetscFunctionReturn(0); 38 } 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 42 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info_dummy,Mat *fact) 43 { 44 PetscFunctionBegin; 45 SETERRQ(PETSC_ERR_SUP,"no support of numerical factorization for mpidense matrix type"); 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatDenseGetLocalMatrix" 51 /*@ 52 53 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 54 matrix that represents the operator. For sequential matrices it returns itself. 55 56 Input Parameter: 57 . A - the Seq or MPI dense matrix 58 59 Output Parameter: 60 . B - the inner matrix 61 62 Level: intermediate 63 64 @*/ 65 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 66 { 67 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 68 PetscErrorCode ierr; 69 PetscTruth flg; 70 PetscMPIInt size; 71 72 PetscFunctionBegin; 73 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 74 if (!flg) { /* this check sucks! */ 75 ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr); 76 if (flg) { 77 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 78 if (size == 1) flg = PETSC_FALSE; 79 } 80 } 81 if (flg) { 82 *B = mat->A; 83 } else { 84 *B = A; 85 } 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNCT__ 90 #define __FUNCT__ "MatGetRow_MPIDense" 91 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 92 { 93 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 94 PetscErrorCode ierr; 95 PetscInt lrow,rstart = mat->rstart,rend = mat->rend; 96 97 PetscFunctionBegin; 98 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 99 lrow = row - rstart; 100 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatRestoreRow_MPIDense" 106 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 107 { 108 PetscErrorCode ierr; 109 110 PetscFunctionBegin; 111 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 112 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 113 PetscFunctionReturn(0); 114 } 115 116 EXTERN_C_BEGIN 117 #undef __FUNCT__ 118 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 119 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 120 { 121 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 122 PetscErrorCode ierr; 123 PetscInt m = A->m,rstart = mdn->rstart; 124 PetscScalar *array; 125 MPI_Comm comm; 126 127 PetscFunctionBegin; 128 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 129 130 /* The reuse aspect is not implemented efficiently */ 131 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 132 133 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 134 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 135 ierr = MatCreate(comm,B);CHKERRQ(ierr); 136 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 137 ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr); 138 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 139 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 140 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 141 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 142 143 *iscopy = PETSC_TRUE; 144 PetscFunctionReturn(0); 145 } 146 EXTERN_C_END 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatSetValues_MPIDense" 150 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 151 { 152 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 153 PetscErrorCode ierr; 154 PetscInt i,j,rstart = A->rstart,rend = A->rend,row; 155 PetscTruth roworiented = A->roworiented; 156 157 PetscFunctionBegin; 158 for (i=0; i<m; i++) { 159 if (idxm[i] < 0) continue; 160 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 161 if (idxm[i] >= rstart && idxm[i] < rend) { 162 row = idxm[i] - rstart; 163 if (roworiented) { 164 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 165 } else { 166 for (j=0; j<n; j++) { 167 if (idxn[j] < 0) continue; 168 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 169 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 170 } 171 } 172 } else { 173 if (!A->donotstash) { 174 if (roworiented) { 175 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 176 } else { 177 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 178 } 179 } 180 } 181 } 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatGetValues_MPIDense" 187 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 188 { 189 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 190 PetscErrorCode ierr; 191 PetscInt i,j,rstart = mdn->rstart,rend = mdn->rend,row; 192 193 PetscFunctionBegin; 194 for (i=0; i<m; i++) { 195 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 196 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 197 if (idxm[i] >= rstart && idxm[i] < rend) { 198 row = idxm[i] - rstart; 199 for (j=0; j<n; j++) { 200 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 201 if (idxn[j] >= mat->N) { 202 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 203 } 204 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 205 } 206 } else { 207 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 208 } 209 } 210 PetscFunctionReturn(0); 211 } 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatGetArray_MPIDense" 215 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 216 { 217 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 218 PetscErrorCode ierr; 219 220 PetscFunctionBegin; 221 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 227 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 228 { 229 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 230 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 231 PetscErrorCode ierr; 232 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 233 PetscScalar *av,*bv,*v = lmat->v; 234 Mat newmat; 235 236 PetscFunctionBegin; 237 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 238 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 239 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 240 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 241 242 /* No parallel redistribution currently supported! Should really check each index set 243 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 244 original matrix! */ 245 246 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 247 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 248 249 /* Check submatrix call */ 250 if (scall == MAT_REUSE_MATRIX) { 251 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 252 /* Really need to test rows and column sizes! */ 253 newmat = *B; 254 } else { 255 /* Create and fill new matrix */ 256 ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr); 257 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 258 ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr); 259 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 260 } 261 262 /* Now extract the data pointers and do the copy, column at a time */ 263 newmatd = (Mat_MPIDense*)newmat->data; 264 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 265 266 for (i=0; i<ncols; i++) { 267 av = v + nlrows*icol[i]; 268 for (j=0; j<nrows; j++) { 269 *bv++ = av[irow[j] - rstart]; 270 } 271 } 272 273 /* Assemble the matrices so that the correct flags are set */ 274 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 275 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 276 277 /* Free work space */ 278 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 279 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 280 *B = newmat; 281 PetscFunctionReturn(0); 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatRestoreArray_MPIDense" 286 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 287 { 288 PetscFunctionBegin; 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 294 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 295 { 296 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 297 MPI_Comm comm = mat->comm; 298 PetscErrorCode ierr; 299 PetscInt nstash,reallocs; 300 InsertMode addv; 301 302 PetscFunctionBegin; 303 /* make sure all processors are either in INSERTMODE or ADDMODE */ 304 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 305 if (addv == (ADD_VALUES|INSERT_VALUES)) { 306 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 307 } 308 mat->insertmode = addv; /* in case this processor had no cache */ 309 310 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 311 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 312 ierr = PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 #undef __FUNCT__ 317 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 318 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 319 { 320 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 321 PetscErrorCode ierr; 322 PetscInt i,*row,*col,flg,j,rstart,ncols; 323 PetscMPIInt n; 324 PetscScalar *val; 325 InsertMode addv=mat->insertmode; 326 327 PetscFunctionBegin; 328 /* wait on receives */ 329 while (1) { 330 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 331 if (!flg) break; 332 333 for (i=0; i<n;) { 334 /* Now identify the consecutive vals belonging to the same row */ 335 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 336 if (j < n) ncols = j-i; 337 else ncols = n-i; 338 /* Now assemble all these values with a single function call */ 339 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 340 i = j; 341 } 342 } 343 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 344 345 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 346 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 347 348 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 349 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 350 } 351 PetscFunctionReturn(0); 352 } 353 354 #undef __FUNCT__ 355 #define __FUNCT__ "MatZeroEntries_MPIDense" 356 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 357 { 358 PetscErrorCode ierr; 359 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 360 361 PetscFunctionBegin; 362 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 363 PetscFunctionReturn(0); 364 } 365 366 /* the code does not do the diagonal entries correctly unless the 367 matrix is square and the column and row owerships are identical. 368 This is a BUG. The only way to fix it seems to be to access 369 mdn->A and mdn->B directly and not through the MatZeroRows() 370 routine. 371 */ 372 #undef __FUNCT__ 373 #define __FUNCT__ "MatZeroRows_MPIDense" 374 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 375 { 376 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 377 PetscErrorCode ierr; 378 PetscInt i,*owners = l->rowners; 379 PetscInt *nprocs,j,idx,nsends; 380 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 381 PetscInt *rvalues,tag = A->tag,count,base,slen,*source; 382 PetscInt *lens,*lrows,*values; 383 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 384 MPI_Comm comm = A->comm; 385 MPI_Request *send_waits,*recv_waits; 386 MPI_Status recv_status,*send_status; 387 PetscTruth found; 388 389 PetscFunctionBegin; 390 /* first count number of contributors to each processor */ 391 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 392 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 393 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 394 for (i=0; i<N; i++) { 395 idx = rows[i]; 396 found = PETSC_FALSE; 397 for (j=0; j<size; j++) { 398 if (idx >= owners[j] && idx < owners[j+1]) { 399 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 400 } 401 } 402 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 403 } 404 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 405 406 /* inform other processors of number of messages and max length*/ 407 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 408 409 /* post receives: */ 410 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 411 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 412 for (i=0; i<nrecvs; i++) { 413 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 414 } 415 416 /* do sends: 417 1) starts[i] gives the starting index in svalues for stuff going to 418 the ith processor 419 */ 420 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 421 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 422 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 423 starts[0] = 0; 424 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 425 for (i=0; i<N; i++) { 426 svalues[starts[owner[i]]++] = rows[i]; 427 } 428 429 starts[0] = 0; 430 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 431 count = 0; 432 for (i=0; i<size; i++) { 433 if (nprocs[2*i+1]) { 434 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 435 } 436 } 437 ierr = PetscFree(starts);CHKERRQ(ierr); 438 439 base = owners[rank]; 440 441 /* wait on receives */ 442 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 443 source = lens + nrecvs; 444 count = nrecvs; slen = 0; 445 while (count) { 446 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 447 /* unpack receives into our local space */ 448 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 449 source[imdex] = recv_status.MPI_SOURCE; 450 lens[imdex] = n; 451 slen += n; 452 count--; 453 } 454 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 455 456 /* move the data into the send scatter */ 457 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 458 count = 0; 459 for (i=0; i<nrecvs; i++) { 460 values = rvalues + i*nmax; 461 for (j=0; j<lens[i]; j++) { 462 lrows[count++] = values[j] - base; 463 } 464 } 465 ierr = PetscFree(rvalues);CHKERRQ(ierr); 466 ierr = PetscFree(lens);CHKERRQ(ierr); 467 ierr = PetscFree(owner);CHKERRQ(ierr); 468 ierr = PetscFree(nprocs);CHKERRQ(ierr); 469 470 /* actually zap the local rows */ 471 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 472 ierr = PetscFree(lrows);CHKERRQ(ierr); 473 474 /* wait on sends */ 475 if (nsends) { 476 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 477 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 478 ierr = PetscFree(send_status);CHKERRQ(ierr); 479 } 480 ierr = PetscFree(send_waits);CHKERRQ(ierr); 481 ierr = PetscFree(svalues);CHKERRQ(ierr); 482 483 PetscFunctionReturn(0); 484 } 485 486 #undef __FUNCT__ 487 #define __FUNCT__ "MatMult_MPIDense" 488 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 489 { 490 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 491 PetscErrorCode ierr; 492 493 PetscFunctionBegin; 494 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 495 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 496 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMultAdd_MPIDense" 502 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 503 { 504 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 509 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 510 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 511 PetscFunctionReturn(0); 512 } 513 514 #undef __FUNCT__ 515 #define __FUNCT__ "MatMultTranspose_MPIDense" 516 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 517 { 518 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 519 PetscErrorCode ierr; 520 PetscScalar zero = 0.0; 521 522 PetscFunctionBegin; 523 ierr = VecSet(yy,zero);CHKERRQ(ierr); 524 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 525 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 526 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 527 PetscFunctionReturn(0); 528 } 529 530 #undef __FUNCT__ 531 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 532 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 533 { 534 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 535 PetscErrorCode ierr; 536 537 PetscFunctionBegin; 538 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 539 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 540 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 541 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatGetDiagonal_MPIDense" 547 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 548 { 549 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 550 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 551 PetscErrorCode ierr; 552 PetscInt len,i,n,m = A->m,radd; 553 PetscScalar *x,zero = 0.0; 554 555 PetscFunctionBegin; 556 ierr = VecSet(v,zero);CHKERRQ(ierr); 557 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 558 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 559 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 560 len = PetscMin(a->A->m,a->A->n); 561 radd = a->rstart*m; 562 for (i=0; i<len; i++) { 563 x[i] = aloc->v[radd + i*m + i]; 564 } 565 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatDestroy_MPIDense" 571 PetscErrorCode MatDestroy_MPIDense(Mat mat) 572 { 573 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 574 PetscErrorCode ierr; 575 576 PetscFunctionBegin; 577 578 #if defined(PETSC_USE_LOG) 579 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 580 #endif 581 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 582 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 583 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 584 if (mdn->lvec) VecDestroy(mdn->lvec); 585 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 586 if (mdn->factor) { 587 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 588 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 589 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 590 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 591 } 592 ierr = PetscFree(mdn);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 594 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatView_MPIDense_Binary" 600 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 601 { 602 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 603 PetscErrorCode ierr; 604 605 PetscFunctionBegin; 606 if (mdn->size == 1) { 607 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 608 } 609 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNCT__ 614 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 615 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 616 { 617 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 618 PetscErrorCode ierr; 619 PetscMPIInt size = mdn->size,rank = mdn->rank; 620 PetscViewerType vtype; 621 PetscTruth iascii,isdraw; 622 PetscViewer sviewer; 623 PetscViewerFormat format; 624 625 PetscFunctionBegin; 626 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 627 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 628 if (iascii) { 629 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 630 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 631 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 632 MatInfo info; 633 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 634 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m, 635 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 636 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 637 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 638 PetscFunctionReturn(0); 639 } else if (format == PETSC_VIEWER_ASCII_INFO) { 640 PetscFunctionReturn(0); 641 } 642 } else if (isdraw) { 643 PetscDraw draw; 644 PetscTruth isnull; 645 646 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 647 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 648 if (isnull) PetscFunctionReturn(0); 649 } 650 651 if (size == 1) { 652 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 653 } else { 654 /* assemble the entire matrix onto first processor. */ 655 Mat A; 656 PetscInt M = mat->M,N = mat->N,m,row,i,nz; 657 PetscInt *cols; 658 PetscScalar *vals; 659 660 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 661 if (!rank) { 662 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 663 } else { 664 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 665 } 666 /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */ 667 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 668 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 669 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 670 671 /* Copy the matrix ... This isn't the most efficient means, 672 but it's quick for now */ 673 A->insertmode = INSERT_VALUES; 674 row = mdn->rstart; m = mdn->A->m; 675 for (i=0; i<m; i++) { 676 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 677 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 678 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 679 row++; 680 } 681 682 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 683 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 684 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 685 if (!rank) { 686 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 687 } 688 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 689 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 690 ierr = MatDestroy(A);CHKERRQ(ierr); 691 } 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatView_MPIDense" 697 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 698 { 699 PetscErrorCode ierr; 700 PetscTruth iascii,isbinary,isdraw,issocket; 701 702 PetscFunctionBegin; 703 704 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 705 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 706 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 707 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 708 709 if (iascii || issocket || isdraw) { 710 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 711 } else if (isbinary) { 712 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 713 } else { 714 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 715 } 716 PetscFunctionReturn(0); 717 } 718 719 #undef __FUNCT__ 720 #define __FUNCT__ "MatGetInfo_MPIDense" 721 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 722 { 723 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 724 Mat mdn = mat->A; 725 PetscErrorCode ierr; 726 PetscReal isend[5],irecv[5]; 727 728 PetscFunctionBegin; 729 info->rows_global = (double)A->M; 730 info->columns_global = (double)A->N; 731 info->rows_local = (double)A->m; 732 info->columns_local = (double)A->N; 733 info->block_size = 1.0; 734 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 735 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 736 isend[3] = info->memory; isend[4] = info->mallocs; 737 if (flag == MAT_LOCAL) { 738 info->nz_used = isend[0]; 739 info->nz_allocated = isend[1]; 740 info->nz_unneeded = isend[2]; 741 info->memory = isend[3]; 742 info->mallocs = isend[4]; 743 } else if (flag == MAT_GLOBAL_MAX) { 744 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 745 info->nz_used = irecv[0]; 746 info->nz_allocated = irecv[1]; 747 info->nz_unneeded = irecv[2]; 748 info->memory = irecv[3]; 749 info->mallocs = irecv[4]; 750 } else if (flag == MAT_GLOBAL_SUM) { 751 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 752 info->nz_used = irecv[0]; 753 info->nz_allocated = irecv[1]; 754 info->nz_unneeded = irecv[2]; 755 info->memory = irecv[3]; 756 info->mallocs = irecv[4]; 757 } 758 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 759 info->fill_ratio_needed = 0; 760 info->factor_mallocs = 0; 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatSetOption_MPIDense" 766 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op) 767 { 768 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 769 PetscErrorCode ierr; 770 771 PetscFunctionBegin; 772 switch (op) { 773 case MAT_NO_NEW_NONZERO_LOCATIONS: 774 case MAT_YES_NEW_NONZERO_LOCATIONS: 775 case MAT_NEW_NONZERO_LOCATION_ERR: 776 case MAT_NEW_NONZERO_ALLOCATION_ERR: 777 case MAT_COLUMNS_SORTED: 778 case MAT_COLUMNS_UNSORTED: 779 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 780 break; 781 case MAT_ROW_ORIENTED: 782 a->roworiented = PETSC_TRUE; 783 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 784 break; 785 case MAT_ROWS_SORTED: 786 case MAT_ROWS_UNSORTED: 787 case MAT_YES_NEW_DIAGONALS: 788 case MAT_USE_HASH_TABLE: 789 ierr = PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));CHKERRQ(ierr); 790 break; 791 case MAT_COLUMN_ORIENTED: 792 a->roworiented = PETSC_FALSE; 793 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 794 break; 795 case MAT_IGNORE_OFF_PROC_ENTRIES: 796 a->donotstash = PETSC_TRUE; 797 break; 798 case MAT_NO_NEW_DIAGONALS: 799 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 800 case MAT_SYMMETRIC: 801 case MAT_STRUCTURALLY_SYMMETRIC: 802 case MAT_NOT_SYMMETRIC: 803 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 804 case MAT_HERMITIAN: 805 case MAT_NOT_HERMITIAN: 806 case MAT_SYMMETRY_ETERNAL: 807 case MAT_NOT_SYMMETRY_ETERNAL: 808 break; 809 default: 810 SETERRQ(PETSC_ERR_SUP,"unknown option"); 811 } 812 PetscFunctionReturn(0); 813 } 814 815 816 #undef __FUNCT__ 817 #define __FUNCT__ "MatDiagonalScale_MPIDense" 818 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 819 { 820 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 821 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 822 PetscScalar *l,*r,x,*v; 823 PetscErrorCode ierr; 824 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 825 826 PetscFunctionBegin; 827 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 828 if (ll) { 829 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 830 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 831 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 832 for (i=0; i<m; i++) { 833 x = l[i]; 834 v = mat->v + i; 835 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 836 } 837 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 838 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 839 } 840 if (rr) { 841 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 842 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 843 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 844 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 845 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 846 for (i=0; i<n; i++) { 847 x = r[i]; 848 v = mat->v + i*m; 849 for (j=0; j<m; j++) { (*v++) *= x;} 850 } 851 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 852 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 853 } 854 PetscFunctionReturn(0); 855 } 856 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatNorm_MPIDense" 859 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 860 { 861 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 862 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 863 PetscErrorCode ierr; 864 PetscInt i,j; 865 PetscReal sum = 0.0; 866 PetscScalar *v = mat->v; 867 868 PetscFunctionBegin; 869 if (mdn->size == 1) { 870 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 871 } else { 872 if (type == NORM_FROBENIUS) { 873 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 874 #if defined(PETSC_USE_COMPLEX) 875 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 876 #else 877 sum += (*v)*(*v); v++; 878 #endif 879 } 880 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 881 *nrm = sqrt(*nrm); 882 ierr = PetscLogFlops(2*mdn->A->n*mdn->A->m);CHKERRQ(ierr); 883 } else if (type == NORM_1) { 884 PetscReal *tmp,*tmp2; 885 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 886 tmp2 = tmp + A->N; 887 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 888 *nrm = 0.0; 889 v = mat->v; 890 for (j=0; j<mdn->A->n; j++) { 891 for (i=0; i<mdn->A->m; i++) { 892 tmp[j] += PetscAbsScalar(*v); v++; 893 } 894 } 895 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 896 for (j=0; j<A->N; j++) { 897 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 898 } 899 ierr = PetscFree(tmp);CHKERRQ(ierr); 900 ierr = PetscLogFlops(A->n*A->m);CHKERRQ(ierr); 901 } else if (type == NORM_INFINITY) { /* max row norm */ 902 PetscReal ntemp; 903 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 904 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 905 } else { 906 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 907 } 908 } 909 PetscFunctionReturn(0); 910 } 911 912 #undef __FUNCT__ 913 #define __FUNCT__ "MatTranspose_MPIDense" 914 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout) 915 { 916 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 917 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 918 Mat B; 919 PetscInt M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 920 PetscErrorCode ierr; 921 PetscInt j,i; 922 PetscScalar *v; 923 924 PetscFunctionBegin; 925 if (!matout && M != N) { 926 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 927 } 928 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 929 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 930 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 931 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 932 933 m = a->A->m; n = a->A->n; v = Aloc->v; 934 ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 935 for (j=0; j<n; j++) { 936 for (i=0; i<m; i++) rwork[i] = rstart + i; 937 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 938 v += m; 939 } 940 ierr = PetscFree(rwork);CHKERRQ(ierr); 941 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 942 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 943 if (matout) { 944 *matout = B; 945 } else { 946 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 #include "petscblaslapack.h" 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatScale_MPIDense" 954 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 955 { 956 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 957 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 958 PetscScalar oalpha = alpha; 959 PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N; 960 PetscErrorCode ierr; 961 962 PetscFunctionBegin; 963 BLASscal_(&nz,&oalpha,a->v,&one); 964 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 969 970 #undef __FUNCT__ 971 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 972 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 973 { 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 /* -------------------------------------------------------------------*/ 982 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 983 MatGetRow_MPIDense, 984 MatRestoreRow_MPIDense, 985 MatMult_MPIDense, 986 /* 4*/ MatMultAdd_MPIDense, 987 MatMultTranspose_MPIDense, 988 MatMultTransposeAdd_MPIDense, 989 0, 990 0, 991 0, 992 /*10*/ 0, 993 0, 994 0, 995 0, 996 MatTranspose_MPIDense, 997 /*15*/ MatGetInfo_MPIDense, 998 0, 999 MatGetDiagonal_MPIDense, 1000 MatDiagonalScale_MPIDense, 1001 MatNorm_MPIDense, 1002 /*20*/ MatAssemblyBegin_MPIDense, 1003 MatAssemblyEnd_MPIDense, 1004 0, 1005 MatSetOption_MPIDense, 1006 MatZeroEntries_MPIDense, 1007 /*25*/ MatZeroRows_MPIDense, 1008 MatLUFactorSymbolic_MPIDense, 1009 MatLUFactorNumeric_MPIDense, 1010 MatCholeskyFactorSymbolic_MPIDense, 1011 MatCholeskyFactorNumeric_MPIDense, 1012 /*30*/ MatSetUpPreallocation_MPIDense, 1013 0, 1014 0, 1015 MatGetArray_MPIDense, 1016 MatRestoreArray_MPIDense, 1017 /*35*/ MatDuplicate_MPIDense, 1018 0, 1019 0, 1020 0, 1021 0, 1022 /*40*/ 0, 1023 MatGetSubMatrices_MPIDense, 1024 0, 1025 MatGetValues_MPIDense, 1026 0, 1027 /*45*/ 0, 1028 MatScale_MPIDense, 1029 0, 1030 0, 1031 0, 1032 /*50*/ 0, 1033 0, 1034 0, 1035 0, 1036 0, 1037 /*55*/ 0, 1038 0, 1039 0, 1040 0, 1041 0, 1042 /*60*/ MatGetSubMatrix_MPIDense, 1043 MatDestroy_MPIDense, 1044 MatView_MPIDense, 1045 MatGetPetscMaps_Petsc, 1046 0, 1047 /*65*/ 0, 1048 0, 1049 0, 1050 0, 1051 0, 1052 /*70*/ 0, 1053 0, 1054 0, 1055 0, 1056 0, 1057 /*75*/ 0, 1058 0, 1059 0, 1060 0, 1061 0, 1062 /*80*/ 0, 1063 0, 1064 0, 1065 0, 1066 /*84*/ MatLoad_MPIDense, 1067 0, 1068 0, 1069 0, 1070 0, 1071 0, 1072 /*90*/ 0, 1073 0, 1074 0, 1075 0, 1076 0, 1077 /*95*/ 0, 1078 0, 1079 0, 1080 0}; 1081 1082 EXTERN_C_BEGIN 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1085 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1086 { 1087 Mat_MPIDense *a; 1088 PetscErrorCode ierr; 1089 1090 PetscFunctionBegin; 1091 mat->preallocated = PETSC_TRUE; 1092 /* Note: For now, when data is specified above, this assumes the user correctly 1093 allocates the local dense storage space. We should add error checking. */ 1094 1095 a = (Mat_MPIDense*)mat->data; 1096 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1097 ierr = MatSetSizes(a->A,mat->m,mat->N,mat->m,mat->N);CHKERRQ(ierr); 1098 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1099 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1100 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } 1103 EXTERN_C_END 1104 1105 /*MC 1106 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1107 1108 Options Database Keys: 1109 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1110 1111 Level: beginner 1112 1113 .seealso: MatCreateMPIDense 1114 M*/ 1115 1116 EXTERN_C_BEGIN 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatCreate_MPIDense" 1119 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1120 { 1121 Mat_MPIDense *a; 1122 PetscErrorCode ierr; 1123 PetscInt i; 1124 1125 PetscFunctionBegin; 1126 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1127 mat->data = (void*)a; 1128 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1129 mat->factor = 0; 1130 mat->mapping = 0; 1131 1132 a->factor = 0; 1133 mat->insertmode = NOT_SET_VALUES; 1134 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 1135 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 1136 1137 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 1138 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 1139 a->nvec = mat->n; 1140 1141 /* the information in the maps duplicates the information computed below, eventually 1142 we should remove the duplicate information that is not contained in the maps */ 1143 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 1144 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 1145 1146 /* build local table of row and column ownerships */ 1147 ierr = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr); 1148 a->cowners = a->rowners + a->size + 1; 1149 ierr = PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr); 1150 ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr); 1151 a->rowners[0] = 0; 1152 for (i=2; i<=a->size; i++) { 1153 a->rowners[i] += a->rowners[i-1]; 1154 } 1155 a->rstart = a->rowners[a->rank]; 1156 a->rend = a->rowners[a->rank+1]; 1157 ierr = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr); 1158 a->cowners[0] = 0; 1159 for (i=2; i<=a->size; i++) { 1160 a->cowners[i] += a->cowners[i-1]; 1161 } 1162 1163 /* build cache for off array entries formed */ 1164 a->donotstash = PETSC_FALSE; 1165 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1166 1167 /* stuff used for matrix vector multiply */ 1168 a->lvec = 0; 1169 a->Mvctx = 0; 1170 a->roworiented = PETSC_TRUE; 1171 1172 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1173 "MatGetDiagonalBlock_MPIDense", 1174 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1175 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1176 "MatMPIDenseSetPreallocation_MPIDense", 1177 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1178 PetscFunctionReturn(0); 1179 } 1180 EXTERN_C_END 1181 1182 /*MC 1183 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1184 1185 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1186 and MATMPIDENSE otherwise. 1187 1188 Options Database Keys: 1189 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1190 1191 Level: beginner 1192 1193 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1194 M*/ 1195 1196 EXTERN_C_BEGIN 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatCreate_Dense" 1199 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1200 { 1201 PetscErrorCode ierr; 1202 PetscMPIInt size; 1203 1204 PetscFunctionBegin; 1205 ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr); 1206 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1207 if (size == 1) { 1208 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1209 } else { 1210 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1211 } 1212 PetscFunctionReturn(0); 1213 } 1214 EXTERN_C_END 1215 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1218 /*@C 1219 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1220 1221 Not collective 1222 1223 Input Parameters: 1224 . A - the matrix 1225 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1226 to control all matrix memory allocation. 1227 1228 Notes: 1229 The dense format is fully compatible with standard Fortran 77 1230 storage by columns. 1231 1232 The data input variable is intended primarily for Fortran programmers 1233 who wish to allocate their own matrix memory space. Most users should 1234 set data=PETSC_NULL. 1235 1236 Level: intermediate 1237 1238 .keywords: matrix,dense, parallel 1239 1240 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1241 @*/ 1242 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1243 { 1244 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1245 1246 PetscFunctionBegin; 1247 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1248 if (f) { 1249 ierr = (*f)(mat,data);CHKERRQ(ierr); 1250 } 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatCreateMPIDense" 1256 /*@C 1257 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1258 1259 Collective on MPI_Comm 1260 1261 Input Parameters: 1262 + comm - MPI communicator 1263 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1264 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1265 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1266 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1267 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1268 to control all matrix memory allocation. 1269 1270 Output Parameter: 1271 . A - the matrix 1272 1273 Notes: 1274 The dense format is fully compatible with standard Fortran 77 1275 storage by columns. 1276 1277 The data input variable is intended primarily for Fortran programmers 1278 who wish to allocate their own matrix memory space. Most users should 1279 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1280 1281 The user MUST specify either the local or global matrix dimensions 1282 (possibly both). 1283 1284 Level: intermediate 1285 1286 .keywords: matrix,dense, parallel 1287 1288 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1289 @*/ 1290 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1291 { 1292 PetscErrorCode ierr; 1293 PetscMPIInt size; 1294 1295 PetscFunctionBegin; 1296 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1297 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1298 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1299 if (size > 1) { 1300 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1301 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1302 } else { 1303 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1304 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1305 } 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatDuplicate_MPIDense" 1311 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1312 { 1313 Mat mat; 1314 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1315 PetscErrorCode ierr; 1316 1317 PetscFunctionBegin; 1318 *newmat = 0; 1319 ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr); 1320 ierr = MatSetSizes(mat,A->m,A->n,A->M,A->N);CHKERRQ(ierr); 1321 ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr); 1322 a = (Mat_MPIDense*)mat->data; 1323 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1324 mat->factor = A->factor; 1325 mat->assembled = PETSC_TRUE; 1326 mat->preallocated = PETSC_TRUE; 1327 1328 a->rstart = oldmat->rstart; 1329 a->rend = oldmat->rend; 1330 a->size = oldmat->size; 1331 a->rank = oldmat->rank; 1332 mat->insertmode = NOT_SET_VALUES; 1333 a->nvec = oldmat->nvec; 1334 a->donotstash = oldmat->donotstash; 1335 1336 ierr = PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr); 1337 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1338 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1339 1340 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1341 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1342 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1343 *newmat = mat; 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #include "petscsys.h" 1348 1349 #undef __FUNCT__ 1350 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1351 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1352 { 1353 PetscErrorCode ierr; 1354 PetscMPIInt rank,size; 1355 PetscInt *rowners,i,m,nz,j; 1356 PetscScalar *array,*vals,*vals_ptr; 1357 MPI_Status status; 1358 1359 PetscFunctionBegin; 1360 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1361 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1362 1363 /* determine ownership of all rows */ 1364 m = M/size + ((M % size) > rank); 1365 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1366 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1367 rowners[0] = 0; 1368 for (i=2; i<=size; i++) { 1369 rowners[i] += rowners[i-1]; 1370 } 1371 1372 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1373 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1374 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1375 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1376 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1377 1378 if (!rank) { 1379 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1380 1381 /* read in my part of the matrix numerical values */ 1382 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1383 1384 /* insert into matrix-by row (this is why cannot directly read into array */ 1385 vals_ptr = vals; 1386 for (i=0; i<m; i++) { 1387 for (j=0; j<N; j++) { 1388 array[i + j*m] = *vals_ptr++; 1389 } 1390 } 1391 1392 /* read in other processors and ship out */ 1393 for (i=1; i<size; i++) { 1394 nz = (rowners[i+1] - rowners[i])*N; 1395 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1396 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1397 } 1398 } else { 1399 /* receive numeric values */ 1400 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1401 1402 /* receive message of values*/ 1403 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1404 1405 /* insert into matrix-by row (this is why cannot directly read into array */ 1406 vals_ptr = vals; 1407 for (i=0; i<m; i++) { 1408 for (j=0; j<N; j++) { 1409 array[i + j*m] = *vals_ptr++; 1410 } 1411 } 1412 } 1413 ierr = PetscFree(rowners);CHKERRQ(ierr); 1414 ierr = PetscFree(vals);CHKERRQ(ierr); 1415 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1416 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatLoad_MPIDense" 1422 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1423 { 1424 Mat A; 1425 PetscScalar *vals,*svals; 1426 MPI_Comm comm = ((PetscObject)viewer)->comm; 1427 MPI_Status status; 1428 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1429 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1430 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1431 PetscInt i,nz,j,rstart,rend; 1432 int fd; 1433 PetscErrorCode ierr; 1434 1435 PetscFunctionBegin; 1436 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1437 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1438 if (!rank) { 1439 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1440 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1441 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1442 } 1443 1444 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1445 M = header[1]; N = header[2]; nz = header[3]; 1446 1447 /* 1448 Handle case where matrix is stored on disk as a dense matrix 1449 */ 1450 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1451 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1452 PetscFunctionReturn(0); 1453 } 1454 1455 /* determine ownership of all rows */ 1456 m = M/size + ((M % size) > rank); 1457 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1458 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1459 rowners[0] = 0; 1460 for (i=2; i<=size; i++) { 1461 rowners[i] += rowners[i-1]; 1462 } 1463 rstart = rowners[rank]; 1464 rend = rowners[rank+1]; 1465 1466 /* distribute row lengths to all processors */ 1467 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1468 offlens = ourlens + (rend-rstart); 1469 if (!rank) { 1470 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1471 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1472 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1473 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1474 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1475 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1476 } else { 1477 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1478 } 1479 1480 if (!rank) { 1481 /* calculate the number of nonzeros on each processor */ 1482 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1483 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1484 for (i=0; i<size; i++) { 1485 for (j=rowners[i]; j< rowners[i+1]; j++) { 1486 procsnz[i] += rowlengths[j]; 1487 } 1488 } 1489 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1490 1491 /* determine max buffer needed and allocate it */ 1492 maxnz = 0; 1493 for (i=0; i<size; i++) { 1494 maxnz = PetscMax(maxnz,procsnz[i]); 1495 } 1496 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1497 1498 /* read in my part of the matrix column indices */ 1499 nz = procsnz[0]; 1500 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1501 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1502 1503 /* read in every one elses and ship off */ 1504 for (i=1; i<size; i++) { 1505 nz = procsnz[i]; 1506 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1507 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1508 } 1509 ierr = PetscFree(cols);CHKERRQ(ierr); 1510 } else { 1511 /* determine buffer space needed for message */ 1512 nz = 0; 1513 for (i=0; i<m; i++) { 1514 nz += ourlens[i]; 1515 } 1516 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1517 1518 /* receive message of column indices*/ 1519 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1520 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1521 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1522 } 1523 1524 /* loop over local rows, determining number of off diagonal entries */ 1525 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1526 jj = 0; 1527 for (i=0; i<m; i++) { 1528 for (j=0; j<ourlens[i]; j++) { 1529 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1530 jj++; 1531 } 1532 } 1533 1534 /* create our matrix */ 1535 for (i=0; i<m; i++) { 1536 ourlens[i] -= offlens[i]; 1537 } 1538 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1539 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1540 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1541 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1542 A = *newmat; 1543 for (i=0; i<m; i++) { 1544 ourlens[i] += offlens[i]; 1545 } 1546 1547 if (!rank) { 1548 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1549 1550 /* read in my part of the matrix numerical values */ 1551 nz = procsnz[0]; 1552 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1553 1554 /* insert into matrix */ 1555 jj = rstart; 1556 smycols = mycols; 1557 svals = vals; 1558 for (i=0; i<m; i++) { 1559 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1560 smycols += ourlens[i]; 1561 svals += ourlens[i]; 1562 jj++; 1563 } 1564 1565 /* read in other processors and ship out */ 1566 for (i=1; i<size; i++) { 1567 nz = procsnz[i]; 1568 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1569 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1570 } 1571 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1572 } else { 1573 /* receive numeric values */ 1574 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1575 1576 /* receive message of values*/ 1577 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1578 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1579 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1580 1581 /* insert into matrix */ 1582 jj = rstart; 1583 smycols = mycols; 1584 svals = vals; 1585 for (i=0; i<m; i++) { 1586 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1587 smycols += ourlens[i]; 1588 svals += ourlens[i]; 1589 jj++; 1590 } 1591 } 1592 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1593 ierr = PetscFree(vals);CHKERRQ(ierr); 1594 ierr = PetscFree(mycols);CHKERRQ(ierr); 1595 ierr = PetscFree(rowners);CHKERRQ(ierr); 1596 1597 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1598 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1599 PetscFunctionReturn(0); 1600 } 1601 1602 1603 1604 1605 1606