1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscTruth flg; 30 PetscMPIInt size; 31 32 PetscFunctionBegin; 33 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (!flg) { /* this check sucks! */ 35 ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr); 36 if (flg) { 37 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 38 if (size == 1) flg = PETSC_FALSE; 39 } 40 } 41 if (flg) { 42 *B = mat->A; 43 } else { 44 *B = A; 45 } 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRow_MPIDense" 51 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 54 PetscErrorCode ierr; 55 PetscInt lrow,rstart = mat->rstart,rend = mat->rend; 56 57 PetscFunctionBegin; 58 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 59 lrow = row - rstart; 60 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 64 #undef __FUNCT__ 65 #define __FUNCT__ "MatRestoreRow_MPIDense" 66 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 67 { 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 72 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 73 PetscFunctionReturn(0); 74 } 75 76 EXTERN_C_BEGIN 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 79 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 80 { 81 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 82 PetscErrorCode ierr; 83 PetscInt m = A->m,rstart = mdn->rstart; 84 PetscScalar *array; 85 MPI_Comm comm; 86 87 PetscFunctionBegin; 88 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 89 90 /* The reuse aspect is not implemented efficiently */ 91 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 92 93 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 94 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 95 ierr = MatCreate(comm,B);CHKERRQ(ierr); 96 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 97 ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr); 98 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 99 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 100 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 101 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 102 103 *iscopy = PETSC_TRUE; 104 PetscFunctionReturn(0); 105 } 106 EXTERN_C_END 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatSetValues_MPIDense" 110 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 111 { 112 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 113 PetscErrorCode ierr; 114 PetscInt i,j,rstart = A->rstart,rend = A->rend,row; 115 PetscTruth roworiented = A->roworiented; 116 117 PetscFunctionBegin; 118 for (i=0; i<m; i++) { 119 if (idxm[i] < 0) continue; 120 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 121 if (idxm[i] >= rstart && idxm[i] < rend) { 122 row = idxm[i] - rstart; 123 if (roworiented) { 124 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 125 } else { 126 for (j=0; j<n; j++) { 127 if (idxn[j] < 0) continue; 128 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 129 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 130 } 131 } 132 } else { 133 if (!A->donotstash) { 134 if (roworiented) { 135 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 136 } else { 137 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 138 } 139 } 140 } 141 } 142 PetscFunctionReturn(0); 143 } 144 145 #undef __FUNCT__ 146 #define __FUNCT__ "MatGetValues_MPIDense" 147 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 148 { 149 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 150 PetscErrorCode ierr; 151 PetscInt i,j,rstart = mdn->rstart,rend = mdn->rend,row; 152 153 PetscFunctionBegin; 154 for (i=0; i<m; i++) { 155 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 156 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 157 if (idxm[i] >= rstart && idxm[i] < rend) { 158 row = idxm[i] - rstart; 159 for (j=0; j<n; j++) { 160 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 161 if (idxn[j] >= mat->N) { 162 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 163 } 164 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 165 } 166 } else { 167 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 168 } 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatGetArray_MPIDense" 175 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 176 { 177 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 187 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 188 { 189 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 190 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 191 PetscErrorCode ierr; 192 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 193 PetscScalar *av,*bv,*v = lmat->v; 194 Mat newmat; 195 196 PetscFunctionBegin; 197 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 198 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 199 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 200 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 201 202 /* No parallel redistribution currently supported! Should really check each index set 203 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 204 original matrix! */ 205 206 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 207 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 208 209 /* Check submatrix call */ 210 if (scall == MAT_REUSE_MATRIX) { 211 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 212 /* Really need to test rows and column sizes! */ 213 newmat = *B; 214 } else { 215 /* Create and fill new matrix */ 216 ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr); 217 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 218 ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr); 219 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 220 } 221 222 /* Now extract the data pointers and do the copy, column at a time */ 223 newmatd = (Mat_MPIDense*)newmat->data; 224 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 225 226 for (i=0; i<ncols; i++) { 227 av = v + nlrows*icol[i]; 228 for (j=0; j<nrows; j++) { 229 *bv++ = av[irow[j] - rstart]; 230 } 231 } 232 233 /* Assemble the matrices so that the correct flags are set */ 234 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 235 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 236 237 /* Free work space */ 238 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 239 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 240 *B = newmat; 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatRestoreArray_MPIDense" 246 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 247 { 248 PetscFunctionBegin; 249 PetscFunctionReturn(0); 250 } 251 252 #undef __FUNCT__ 253 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 254 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 255 { 256 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 257 MPI_Comm comm = mat->comm; 258 PetscErrorCode ierr; 259 PetscInt nstash,reallocs; 260 InsertMode addv; 261 262 PetscFunctionBegin; 263 /* make sure all processors are either in INSERTMODE or ADDMODE */ 264 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 265 if (addv == (ADD_VALUES|INSERT_VALUES)) { 266 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 267 } 268 mat->insertmode = addv; /* in case this processor had no cache */ 269 270 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 271 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 272 ierr = PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 278 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 279 { 280 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 281 PetscErrorCode ierr; 282 PetscInt i,*row,*col,flg,j,rstart,ncols; 283 PetscMPIInt n; 284 PetscScalar *val; 285 InsertMode addv=mat->insertmode; 286 287 PetscFunctionBegin; 288 /* wait on receives */ 289 while (1) { 290 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 291 if (!flg) break; 292 293 for (i=0; i<n;) { 294 /* Now identify the consecutive vals belonging to the same row */ 295 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 296 if (j < n) ncols = j-i; 297 else ncols = n-i; 298 /* Now assemble all these values with a single function call */ 299 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 300 i = j; 301 } 302 } 303 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 304 305 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 306 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 307 308 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 309 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 310 } 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatZeroEntries_MPIDense" 316 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 317 { 318 PetscErrorCode ierr; 319 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 320 321 PetscFunctionBegin; 322 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 323 PetscFunctionReturn(0); 324 } 325 326 /* the code does not do the diagonal entries correctly unless the 327 matrix is square and the column and row owerships are identical. 328 This is a BUG. The only way to fix it seems to be to access 329 mdn->A and mdn->B directly and not through the MatZeroRows() 330 routine. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "MatZeroRows_MPIDense" 334 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 335 { 336 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 337 PetscErrorCode ierr; 338 PetscInt i,*owners = l->rowners; 339 PetscInt *nprocs,j,idx,nsends; 340 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 341 PetscInt *rvalues,tag = A->tag,count,base,slen,*source; 342 PetscInt *lens,*lrows,*values; 343 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 344 MPI_Comm comm = A->comm; 345 MPI_Request *send_waits,*recv_waits; 346 MPI_Status recv_status,*send_status; 347 PetscTruth found; 348 349 PetscFunctionBegin; 350 /* first count number of contributors to each processor */ 351 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 352 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 353 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 354 for (i=0; i<N; i++) { 355 idx = rows[i]; 356 found = PETSC_FALSE; 357 for (j=0; j<size; j++) { 358 if (idx >= owners[j] && idx < owners[j+1]) { 359 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 360 } 361 } 362 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 363 } 364 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 365 366 /* inform other processors of number of messages and max length*/ 367 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 368 369 /* post receives: */ 370 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 371 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 372 for (i=0; i<nrecvs; i++) { 373 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 374 } 375 376 /* do sends: 377 1) starts[i] gives the starting index in svalues for stuff going to 378 the ith processor 379 */ 380 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 381 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 382 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 383 starts[0] = 0; 384 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 385 for (i=0; i<N; i++) { 386 svalues[starts[owner[i]]++] = rows[i]; 387 } 388 389 starts[0] = 0; 390 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 391 count = 0; 392 for (i=0; i<size; i++) { 393 if (nprocs[2*i+1]) { 394 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 395 } 396 } 397 ierr = PetscFree(starts);CHKERRQ(ierr); 398 399 base = owners[rank]; 400 401 /* wait on receives */ 402 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 403 source = lens + nrecvs; 404 count = nrecvs; slen = 0; 405 while (count) { 406 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 407 /* unpack receives into our local space */ 408 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 409 source[imdex] = recv_status.MPI_SOURCE; 410 lens[imdex] = n; 411 slen += n; 412 count--; 413 } 414 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 415 416 /* move the data into the send scatter */ 417 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 418 count = 0; 419 for (i=0; i<nrecvs; i++) { 420 values = rvalues + i*nmax; 421 for (j=0; j<lens[i]; j++) { 422 lrows[count++] = values[j] - base; 423 } 424 } 425 ierr = PetscFree(rvalues);CHKERRQ(ierr); 426 ierr = PetscFree(lens);CHKERRQ(ierr); 427 ierr = PetscFree(owner);CHKERRQ(ierr); 428 ierr = PetscFree(nprocs);CHKERRQ(ierr); 429 430 /* actually zap the local rows */ 431 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 432 ierr = PetscFree(lrows);CHKERRQ(ierr); 433 434 /* wait on sends */ 435 if (nsends) { 436 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 437 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 438 ierr = PetscFree(send_status);CHKERRQ(ierr); 439 } 440 ierr = PetscFree(send_waits);CHKERRQ(ierr); 441 ierr = PetscFree(svalues);CHKERRQ(ierr); 442 443 PetscFunctionReturn(0); 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatMult_MPIDense" 448 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 449 { 450 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 451 PetscErrorCode ierr; 452 453 PetscFunctionBegin; 454 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 455 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 456 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatMultAdd_MPIDense" 462 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 463 { 464 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 465 PetscErrorCode ierr; 466 467 PetscFunctionBegin; 468 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 469 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 470 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatMultTranspose_MPIDense" 476 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 477 { 478 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 479 PetscErrorCode ierr; 480 PetscScalar zero = 0.0; 481 482 PetscFunctionBegin; 483 ierr = VecSet(yy,zero);CHKERRQ(ierr); 484 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 485 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 486 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 487 PetscFunctionReturn(0); 488 } 489 490 #undef __FUNCT__ 491 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 492 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 493 { 494 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 495 PetscErrorCode ierr; 496 497 PetscFunctionBegin; 498 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 499 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 500 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 501 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 502 PetscFunctionReturn(0); 503 } 504 505 #undef __FUNCT__ 506 #define __FUNCT__ "MatGetDiagonal_MPIDense" 507 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 508 { 509 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 510 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 511 PetscErrorCode ierr; 512 PetscInt len,i,n,m = A->m,radd; 513 PetscScalar *x,zero = 0.0; 514 515 PetscFunctionBegin; 516 ierr = VecSet(v,zero);CHKERRQ(ierr); 517 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 518 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 519 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 520 len = PetscMin(a->A->m,a->A->n); 521 radd = a->rstart*m; 522 for (i=0; i<len; i++) { 523 x[i] = aloc->v[radd + i*m + i]; 524 } 525 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 #undef __FUNCT__ 530 #define __FUNCT__ "MatDestroy_MPIDense" 531 PetscErrorCode MatDestroy_MPIDense(Mat mat) 532 { 533 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 534 PetscErrorCode ierr; 535 536 PetscFunctionBegin; 537 538 #if defined(PETSC_USE_LOG) 539 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 540 #endif 541 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 542 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 543 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 544 if (mdn->lvec) VecDestroy(mdn->lvec); 545 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 546 if (mdn->factor) { 547 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 548 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 549 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 550 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 551 } 552 ierr = PetscFree(mdn);CHKERRQ(ierr); 553 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 554 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatView_MPIDense_Binary" 560 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 561 { 562 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 563 PetscErrorCode ierr; 564 565 PetscFunctionBegin; 566 if (mdn->size == 1) { 567 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 568 } 569 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 575 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 576 { 577 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 578 PetscErrorCode ierr; 579 PetscMPIInt size = mdn->size,rank = mdn->rank; 580 PetscViewerType vtype; 581 PetscTruth iascii,isdraw; 582 PetscViewer sviewer; 583 PetscViewerFormat format; 584 585 PetscFunctionBegin; 586 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 587 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 588 if (iascii) { 589 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 590 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 591 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 592 MatInfo info; 593 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 594 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m, 595 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 596 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 597 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 598 PetscFunctionReturn(0); 599 } else if (format == PETSC_VIEWER_ASCII_INFO) { 600 PetscFunctionReturn(0); 601 } 602 } else if (isdraw) { 603 PetscDraw draw; 604 PetscTruth isnull; 605 606 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 607 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 608 if (isnull) PetscFunctionReturn(0); 609 } 610 611 if (size == 1) { 612 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 613 } else { 614 /* assemble the entire matrix onto first processor. */ 615 Mat A; 616 PetscInt M = mat->M,N = mat->N,m,row,i,nz; 617 PetscInt *cols; 618 PetscScalar *vals; 619 620 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 621 if (!rank) { 622 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 623 } else { 624 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 625 } 626 /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */ 627 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 628 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 629 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 630 631 /* Copy the matrix ... This isn't the most efficient means, 632 but it's quick for now */ 633 A->insertmode = INSERT_VALUES; 634 row = mdn->rstart; m = mdn->A->m; 635 for (i=0; i<m; i++) { 636 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 637 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 638 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 639 row++; 640 } 641 642 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 643 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 644 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 645 if (!rank) { 646 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 647 } 648 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 649 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 650 ierr = MatDestroy(A);CHKERRQ(ierr); 651 } 652 PetscFunctionReturn(0); 653 } 654 655 #undef __FUNCT__ 656 #define __FUNCT__ "MatView_MPIDense" 657 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 658 { 659 PetscErrorCode ierr; 660 PetscTruth iascii,isbinary,isdraw,issocket; 661 662 PetscFunctionBegin; 663 664 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 665 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 666 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 667 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 668 669 if (iascii || issocket || isdraw) { 670 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 671 } else if (isbinary) { 672 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 673 } else { 674 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 675 } 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatGetInfo_MPIDense" 681 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 682 { 683 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 684 Mat mdn = mat->A; 685 PetscErrorCode ierr; 686 PetscReal isend[5],irecv[5]; 687 688 PetscFunctionBegin; 689 info->rows_global = (double)A->M; 690 info->columns_global = (double)A->N; 691 info->rows_local = (double)A->m; 692 info->columns_local = (double)A->N; 693 info->block_size = 1.0; 694 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 695 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 696 isend[3] = info->memory; isend[4] = info->mallocs; 697 if (flag == MAT_LOCAL) { 698 info->nz_used = isend[0]; 699 info->nz_allocated = isend[1]; 700 info->nz_unneeded = isend[2]; 701 info->memory = isend[3]; 702 info->mallocs = isend[4]; 703 } else if (flag == MAT_GLOBAL_MAX) { 704 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 705 info->nz_used = irecv[0]; 706 info->nz_allocated = irecv[1]; 707 info->nz_unneeded = irecv[2]; 708 info->memory = irecv[3]; 709 info->mallocs = irecv[4]; 710 } else if (flag == MAT_GLOBAL_SUM) { 711 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 712 info->nz_used = irecv[0]; 713 info->nz_allocated = irecv[1]; 714 info->nz_unneeded = irecv[2]; 715 info->memory = irecv[3]; 716 info->mallocs = irecv[4]; 717 } 718 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 719 info->fill_ratio_needed = 0; 720 info->factor_mallocs = 0; 721 PetscFunctionReturn(0); 722 } 723 724 #undef __FUNCT__ 725 #define __FUNCT__ "MatSetOption_MPIDense" 726 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op) 727 { 728 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 729 PetscErrorCode ierr; 730 731 PetscFunctionBegin; 732 switch (op) { 733 case MAT_NO_NEW_NONZERO_LOCATIONS: 734 case MAT_YES_NEW_NONZERO_LOCATIONS: 735 case MAT_NEW_NONZERO_LOCATION_ERR: 736 case MAT_NEW_NONZERO_ALLOCATION_ERR: 737 case MAT_COLUMNS_SORTED: 738 case MAT_COLUMNS_UNSORTED: 739 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 740 break; 741 case MAT_ROW_ORIENTED: 742 a->roworiented = PETSC_TRUE; 743 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 744 break; 745 case MAT_ROWS_SORTED: 746 case MAT_ROWS_UNSORTED: 747 case MAT_YES_NEW_DIAGONALS: 748 case MAT_USE_HASH_TABLE: 749 ierr = PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));CHKERRQ(ierr); 750 break; 751 case MAT_COLUMN_ORIENTED: 752 a->roworiented = PETSC_FALSE; 753 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 754 break; 755 case MAT_IGNORE_OFF_PROC_ENTRIES: 756 a->donotstash = PETSC_TRUE; 757 break; 758 case MAT_NO_NEW_DIAGONALS: 759 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 760 case MAT_SYMMETRIC: 761 case MAT_STRUCTURALLY_SYMMETRIC: 762 case MAT_NOT_SYMMETRIC: 763 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 764 case MAT_HERMITIAN: 765 case MAT_NOT_HERMITIAN: 766 case MAT_SYMMETRY_ETERNAL: 767 case MAT_NOT_SYMMETRY_ETERNAL: 768 break; 769 default: 770 SETERRQ(PETSC_ERR_SUP,"unknown option"); 771 } 772 PetscFunctionReturn(0); 773 } 774 775 776 #undef __FUNCT__ 777 #define __FUNCT__ "MatDiagonalScale_MPIDense" 778 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 779 { 780 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 781 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 782 PetscScalar *l,*r,x,*v; 783 PetscErrorCode ierr; 784 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 785 786 PetscFunctionBegin; 787 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 788 if (ll) { 789 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 790 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 791 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 792 for (i=0; i<m; i++) { 793 x = l[i]; 794 v = mat->v + i; 795 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 796 } 797 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 798 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 799 } 800 if (rr) { 801 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 802 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 803 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 804 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 805 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 806 for (i=0; i<n; i++) { 807 x = r[i]; 808 v = mat->v + i*m; 809 for (j=0; j<m; j++) { (*v++) *= x;} 810 } 811 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 812 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 813 } 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatNorm_MPIDense" 819 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 820 { 821 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 822 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 823 PetscErrorCode ierr; 824 PetscInt i,j; 825 PetscReal sum = 0.0; 826 PetscScalar *v = mat->v; 827 828 PetscFunctionBegin; 829 if (mdn->size == 1) { 830 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 831 } else { 832 if (type == NORM_FROBENIUS) { 833 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 834 #if defined(PETSC_USE_COMPLEX) 835 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 836 #else 837 sum += (*v)*(*v); v++; 838 #endif 839 } 840 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 841 *nrm = sqrt(*nrm); 842 ierr = PetscLogFlops(2*mdn->A->n*mdn->A->m);CHKERRQ(ierr); 843 } else if (type == NORM_1) { 844 PetscReal *tmp,*tmp2; 845 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 846 tmp2 = tmp + A->N; 847 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 848 *nrm = 0.0; 849 v = mat->v; 850 for (j=0; j<mdn->A->n; j++) { 851 for (i=0; i<mdn->A->m; i++) { 852 tmp[j] += PetscAbsScalar(*v); v++; 853 } 854 } 855 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 856 for (j=0; j<A->N; j++) { 857 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 858 } 859 ierr = PetscFree(tmp);CHKERRQ(ierr); 860 ierr = PetscLogFlops(A->n*A->m);CHKERRQ(ierr); 861 } else if (type == NORM_INFINITY) { /* max row norm */ 862 PetscReal ntemp; 863 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 864 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 865 } else { 866 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 867 } 868 } 869 PetscFunctionReturn(0); 870 } 871 872 #undef __FUNCT__ 873 #define __FUNCT__ "MatTranspose_MPIDense" 874 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout) 875 { 876 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 877 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 878 Mat B; 879 PetscInt M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 880 PetscErrorCode ierr; 881 PetscInt j,i; 882 PetscScalar *v; 883 884 PetscFunctionBegin; 885 if (!matout && M != N) { 886 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 887 } 888 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 889 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 890 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 891 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 892 893 m = a->A->m; n = a->A->n; v = Aloc->v; 894 ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 895 for (j=0; j<n; j++) { 896 for (i=0; i<m; i++) rwork[i] = rstart + i; 897 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 898 v += m; 899 } 900 ierr = PetscFree(rwork);CHKERRQ(ierr); 901 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 902 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 903 if (matout) { 904 *matout = B; 905 } else { 906 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 #include "petscblaslapack.h" 912 #undef __FUNCT__ 913 #define __FUNCT__ "MatScale_MPIDense" 914 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 915 { 916 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 917 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 918 PetscScalar oalpha = alpha; 919 PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N; 920 PetscErrorCode ierr; 921 922 PetscFunctionBegin; 923 BLASscal_(&nz,&oalpha,a->v,&one); 924 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 928 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 929 930 #undef __FUNCT__ 931 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 932 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 933 { 934 PetscErrorCode ierr; 935 936 PetscFunctionBegin; 937 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 938 PetscFunctionReturn(0); 939 } 940 941 /* -------------------------------------------------------------------*/ 942 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 943 MatGetRow_MPIDense, 944 MatRestoreRow_MPIDense, 945 MatMult_MPIDense, 946 /* 4*/ MatMultAdd_MPIDense, 947 MatMultTranspose_MPIDense, 948 MatMultTransposeAdd_MPIDense, 949 0, 950 0, 951 0, 952 /*10*/ 0, 953 0, 954 0, 955 0, 956 MatTranspose_MPIDense, 957 /*15*/ MatGetInfo_MPIDense, 958 0, 959 MatGetDiagonal_MPIDense, 960 MatDiagonalScale_MPIDense, 961 MatNorm_MPIDense, 962 /*20*/ MatAssemblyBegin_MPIDense, 963 MatAssemblyEnd_MPIDense, 964 0, 965 MatSetOption_MPIDense, 966 MatZeroEntries_MPIDense, 967 /*25*/ MatZeroRows_MPIDense, 968 0, 969 0, 970 0, 971 0, 972 /*30*/ MatSetUpPreallocation_MPIDense, 973 0, 974 0, 975 MatGetArray_MPIDense, 976 MatRestoreArray_MPIDense, 977 /*35*/ MatDuplicate_MPIDense, 978 0, 979 0, 980 0, 981 0, 982 /*40*/ 0, 983 MatGetSubMatrices_MPIDense, 984 0, 985 MatGetValues_MPIDense, 986 0, 987 /*45*/ 0, 988 MatScale_MPIDense, 989 0, 990 0, 991 0, 992 /*50*/ 0, 993 0, 994 0, 995 0, 996 0, 997 /*55*/ 0, 998 0, 999 0, 1000 0, 1001 0, 1002 /*60*/ MatGetSubMatrix_MPIDense, 1003 MatDestroy_MPIDense, 1004 MatView_MPIDense, 1005 MatGetPetscMaps_Petsc, 1006 0, 1007 /*65*/ 0, 1008 0, 1009 0, 1010 0, 1011 0, 1012 /*70*/ 0, 1013 0, 1014 0, 1015 0, 1016 0, 1017 /*75*/ 0, 1018 0, 1019 0, 1020 0, 1021 0, 1022 /*80*/ 0, 1023 0, 1024 0, 1025 0, 1026 /*84*/ MatLoad_MPIDense, 1027 0, 1028 0, 1029 0, 1030 0, 1031 0, 1032 /*90*/ 0, 1033 0, 1034 0, 1035 0, 1036 0, 1037 /*95*/ 0, 1038 0, 1039 0, 1040 0}; 1041 1042 EXTERN_C_BEGIN 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1045 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1046 { 1047 Mat_MPIDense *a; 1048 PetscErrorCode ierr; 1049 1050 PetscFunctionBegin; 1051 mat->preallocated = PETSC_TRUE; 1052 /* Note: For now, when data is specified above, this assumes the user correctly 1053 allocates the local dense storage space. We should add error checking. */ 1054 1055 a = (Mat_MPIDense*)mat->data; 1056 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1057 ierr = MatSetSizes(a->A,mat->m,mat->N,mat->m,mat->N);CHKERRQ(ierr); 1058 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1059 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1060 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 EXTERN_C_END 1064 1065 /*MC 1066 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1067 1068 Options Database Keys: 1069 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1070 1071 Level: beginner 1072 1073 .seealso: MatCreateMPIDense 1074 M*/ 1075 1076 EXTERN_C_BEGIN 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatCreate_MPIDense" 1079 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1080 { 1081 Mat_MPIDense *a; 1082 PetscErrorCode ierr; 1083 PetscInt i; 1084 1085 PetscFunctionBegin; 1086 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1087 mat->data = (void*)a; 1088 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1089 mat->factor = 0; 1090 mat->mapping = 0; 1091 1092 a->factor = 0; 1093 mat->insertmode = NOT_SET_VALUES; 1094 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 1095 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 1096 1097 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 1098 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 1099 a->nvec = mat->n; 1100 1101 /* the information in the maps duplicates the information computed below, eventually 1102 we should remove the duplicate information that is not contained in the maps */ 1103 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 1104 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 1105 1106 /* build local table of row and column ownerships */ 1107 ierr = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr); 1108 a->cowners = a->rowners + a->size + 1; 1109 ierr = PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr); 1110 ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr); 1111 a->rowners[0] = 0; 1112 for (i=2; i<=a->size; i++) { 1113 a->rowners[i] += a->rowners[i-1]; 1114 } 1115 a->rstart = a->rowners[a->rank]; 1116 a->rend = a->rowners[a->rank+1]; 1117 ierr = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr); 1118 a->cowners[0] = 0; 1119 for (i=2; i<=a->size; i++) { 1120 a->cowners[i] += a->cowners[i-1]; 1121 } 1122 1123 /* build cache for off array entries formed */ 1124 a->donotstash = PETSC_FALSE; 1125 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1126 1127 /* stuff used for matrix vector multiply */ 1128 a->lvec = 0; 1129 a->Mvctx = 0; 1130 a->roworiented = PETSC_TRUE; 1131 1132 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1133 "MatGetDiagonalBlock_MPIDense", 1134 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1135 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1136 "MatMPIDenseSetPreallocation_MPIDense", 1137 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 EXTERN_C_END 1141 1142 /*MC 1143 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1144 1145 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1146 and MATMPIDENSE otherwise. 1147 1148 Options Database Keys: 1149 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1150 1151 Level: beginner 1152 1153 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1154 M*/ 1155 1156 EXTERN_C_BEGIN 1157 #undef __FUNCT__ 1158 #define __FUNCT__ "MatCreate_Dense" 1159 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1160 { 1161 PetscErrorCode ierr; 1162 PetscMPIInt size; 1163 1164 PetscFunctionBegin; 1165 ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr); 1166 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1167 if (size == 1) { 1168 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1169 } else { 1170 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1171 } 1172 PetscFunctionReturn(0); 1173 } 1174 EXTERN_C_END 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1178 /*@C 1179 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1180 1181 Not collective 1182 1183 Input Parameters: 1184 . A - the matrix 1185 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1186 to control all matrix memory allocation. 1187 1188 Notes: 1189 The dense format is fully compatible with standard Fortran 77 1190 storage by columns. 1191 1192 The data input variable is intended primarily for Fortran programmers 1193 who wish to allocate their own matrix memory space. Most users should 1194 set data=PETSC_NULL. 1195 1196 Level: intermediate 1197 1198 .keywords: matrix,dense, parallel 1199 1200 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1201 @*/ 1202 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1203 { 1204 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1205 1206 PetscFunctionBegin; 1207 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1208 if (f) { 1209 ierr = (*f)(mat,data);CHKERRQ(ierr); 1210 } 1211 PetscFunctionReturn(0); 1212 } 1213 1214 #undef __FUNCT__ 1215 #define __FUNCT__ "MatCreateMPIDense" 1216 /*@C 1217 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1218 1219 Collective on MPI_Comm 1220 1221 Input Parameters: 1222 + comm - MPI communicator 1223 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1224 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1225 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1226 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1227 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1228 to control all matrix memory allocation. 1229 1230 Output Parameter: 1231 . A - the matrix 1232 1233 Notes: 1234 The dense format is fully compatible with standard Fortran 77 1235 storage by columns. 1236 1237 The data input variable is intended primarily for Fortran programmers 1238 who wish to allocate their own matrix memory space. Most users should 1239 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1240 1241 The user MUST specify either the local or global matrix dimensions 1242 (possibly both). 1243 1244 Level: intermediate 1245 1246 .keywords: matrix,dense, parallel 1247 1248 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1249 @*/ 1250 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1251 { 1252 PetscErrorCode ierr; 1253 PetscMPIInt size; 1254 1255 PetscFunctionBegin; 1256 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1257 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1258 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1259 if (size > 1) { 1260 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1261 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1262 } else { 1263 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1264 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1265 } 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatDuplicate_MPIDense" 1271 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1272 { 1273 Mat mat; 1274 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1275 PetscErrorCode ierr; 1276 1277 PetscFunctionBegin; 1278 *newmat = 0; 1279 ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr); 1280 ierr = MatSetSizes(mat,A->m,A->n,A->M,A->N);CHKERRQ(ierr); 1281 ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr); 1282 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1283 mat->data = (void*)a; 1284 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1285 mat->factor = A->factor; 1286 mat->assembled = PETSC_TRUE; 1287 mat->preallocated = PETSC_TRUE; 1288 1289 a->rstart = oldmat->rstart; 1290 a->rend = oldmat->rend; 1291 a->size = oldmat->size; 1292 a->rank = oldmat->rank; 1293 mat->insertmode = NOT_SET_VALUES; 1294 a->nvec = oldmat->nvec; 1295 a->donotstash = oldmat->donotstash; 1296 ierr = PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr); 1297 ierr = PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr); 1298 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1299 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1300 1301 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1302 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1303 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1304 *newmat = mat; 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #include "petscsys.h" 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1312 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1313 { 1314 PetscErrorCode ierr; 1315 PetscMPIInt rank,size; 1316 PetscInt *rowners,i,m,nz,j; 1317 PetscScalar *array,*vals,*vals_ptr; 1318 MPI_Status status; 1319 1320 PetscFunctionBegin; 1321 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1322 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1323 1324 /* determine ownership of all rows */ 1325 m = M/size + ((M % size) > rank); 1326 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1327 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1328 rowners[0] = 0; 1329 for (i=2; i<=size; i++) { 1330 rowners[i] += rowners[i-1]; 1331 } 1332 1333 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1334 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1335 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1336 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1337 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1338 1339 if (!rank) { 1340 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1341 1342 /* read in my part of the matrix numerical values */ 1343 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1344 1345 /* insert into matrix-by row (this is why cannot directly read into array */ 1346 vals_ptr = vals; 1347 for (i=0; i<m; i++) { 1348 for (j=0; j<N; j++) { 1349 array[i + j*m] = *vals_ptr++; 1350 } 1351 } 1352 1353 /* read in other processors and ship out */ 1354 for (i=1; i<size; i++) { 1355 nz = (rowners[i+1] - rowners[i])*N; 1356 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1357 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1358 } 1359 } else { 1360 /* receive numeric values */ 1361 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1362 1363 /* receive message of values*/ 1364 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1365 1366 /* insert into matrix-by row (this is why cannot directly read into array */ 1367 vals_ptr = vals; 1368 for (i=0; i<m; i++) { 1369 for (j=0; j<N; j++) { 1370 array[i + j*m] = *vals_ptr++; 1371 } 1372 } 1373 } 1374 ierr = PetscFree(rowners);CHKERRQ(ierr); 1375 ierr = PetscFree(vals);CHKERRQ(ierr); 1376 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatLoad_MPIDense" 1383 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1384 { 1385 Mat A; 1386 PetscScalar *vals,*svals; 1387 MPI_Comm comm = ((PetscObject)viewer)->comm; 1388 MPI_Status status; 1389 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1390 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1391 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1392 PetscInt i,nz,j,rstart,rend; 1393 int fd; 1394 PetscErrorCode ierr; 1395 1396 PetscFunctionBegin; 1397 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1398 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1399 if (!rank) { 1400 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1401 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1402 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1403 } 1404 1405 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1406 M = header[1]; N = header[2]; nz = header[3]; 1407 1408 /* 1409 Handle case where matrix is stored on disk as a dense matrix 1410 */ 1411 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1412 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 /* determine ownership of all rows */ 1417 m = M/size + ((M % size) > rank); 1418 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1419 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1420 rowners[0] = 0; 1421 for (i=2; i<=size; i++) { 1422 rowners[i] += rowners[i-1]; 1423 } 1424 rstart = rowners[rank]; 1425 rend = rowners[rank+1]; 1426 1427 /* distribute row lengths to all processors */ 1428 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1429 offlens = ourlens + (rend-rstart); 1430 if (!rank) { 1431 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1432 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1433 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1434 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1435 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1436 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1437 } else { 1438 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1439 } 1440 1441 if (!rank) { 1442 /* calculate the number of nonzeros on each processor */ 1443 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1444 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1445 for (i=0; i<size; i++) { 1446 for (j=rowners[i]; j< rowners[i+1]; j++) { 1447 procsnz[i] += rowlengths[j]; 1448 } 1449 } 1450 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1451 1452 /* determine max buffer needed and allocate it */ 1453 maxnz = 0; 1454 for (i=0; i<size; i++) { 1455 maxnz = PetscMax(maxnz,procsnz[i]); 1456 } 1457 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1458 1459 /* read in my part of the matrix column indices */ 1460 nz = procsnz[0]; 1461 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1462 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1463 1464 /* read in every one elses and ship off */ 1465 for (i=1; i<size; i++) { 1466 nz = procsnz[i]; 1467 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1468 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1469 } 1470 ierr = PetscFree(cols);CHKERRQ(ierr); 1471 } else { 1472 /* determine buffer space needed for message */ 1473 nz = 0; 1474 for (i=0; i<m; i++) { 1475 nz += ourlens[i]; 1476 } 1477 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1478 1479 /* receive message of column indices*/ 1480 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1481 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1482 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1483 } 1484 1485 /* loop over local rows, determining number of off diagonal entries */ 1486 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1487 jj = 0; 1488 for (i=0; i<m; i++) { 1489 for (j=0; j<ourlens[i]; j++) { 1490 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1491 jj++; 1492 } 1493 } 1494 1495 /* create our matrix */ 1496 for (i=0; i<m; i++) { 1497 ourlens[i] -= offlens[i]; 1498 } 1499 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1500 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1501 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1502 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1503 A = *newmat; 1504 for (i=0; i<m; i++) { 1505 ourlens[i] += offlens[i]; 1506 } 1507 1508 if (!rank) { 1509 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1510 1511 /* read in my part of the matrix numerical values */ 1512 nz = procsnz[0]; 1513 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1514 1515 /* insert into matrix */ 1516 jj = rstart; 1517 smycols = mycols; 1518 svals = vals; 1519 for (i=0; i<m; i++) { 1520 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1521 smycols += ourlens[i]; 1522 svals += ourlens[i]; 1523 jj++; 1524 } 1525 1526 /* read in other processors and ship out */ 1527 for (i=1; i<size; i++) { 1528 nz = procsnz[i]; 1529 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1530 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1531 } 1532 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1533 } else { 1534 /* receive numeric values */ 1535 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1536 1537 /* receive message of values*/ 1538 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1539 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1540 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1541 1542 /* insert into matrix */ 1543 jj = rstart; 1544 smycols = mycols; 1545 svals = vals; 1546 for (i=0; i<m; i++) { 1547 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1548 smycols += ourlens[i]; 1549 svals += ourlens[i]; 1550 jj++; 1551 } 1552 } 1553 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1554 ierr = PetscFree(vals);CHKERRQ(ierr); 1555 ierr = PetscFree(mycols);CHKERRQ(ierr); 1556 ierr = PetscFree(rowners);CHKERRQ(ierr); 1557 1558 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1559 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1560 PetscFunctionReturn(0); 1561 } 1562 1563 1564 1565 1566 1567