1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 @*/ 23 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 24 { 25 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 26 PetscErrorCode ierr; 27 PetscTruth flg; 28 PetscMPIInt size; 29 30 PetscFunctionBegin; 31 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 32 if (!flg) { /* this check sucks! */ 33 ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr); 34 if (flg) { 35 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 36 if (size == 1) flg = PETSC_FALSE; 37 } 38 } 39 if (flg) { 40 *B = mat->A; 41 } else { 42 *B = A; 43 } 44 PetscFunctionReturn(0); 45 } 46 47 #undef __FUNCT__ 48 #define __FUNCT__ "MatGetRow_MPIDense" 49 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 50 { 51 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 52 PetscErrorCode ierr; 53 PetscInt lrow,rstart = mat->rstart,rend = mat->rend; 54 55 PetscFunctionBegin; 56 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 57 lrow = row - rstart; 58 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 59 PetscFunctionReturn(0); 60 } 61 62 #undef __FUNCT__ 63 #define __FUNCT__ "MatRestoreRow_MPIDense" 64 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 65 { 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 70 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN_C_BEGIN 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 77 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 78 { 79 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 80 PetscErrorCode ierr; 81 PetscInt m = A->m,rstart = mdn->rstart; 82 PetscScalar *array; 83 MPI_Comm comm; 84 85 PetscFunctionBegin; 86 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 87 88 /* The reuse aspect is not implemented efficiently */ 89 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 90 91 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 92 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 93 ierr = MatCreate(comm,B);CHKERRQ(ierr); 94 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 95 ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr); 96 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 97 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 98 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 99 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 100 101 *iscopy = PETSC_TRUE; 102 PetscFunctionReturn(0); 103 } 104 EXTERN_C_END 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatSetValues_MPIDense" 108 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 109 { 110 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 111 PetscErrorCode ierr; 112 PetscInt i,j,rstart = A->rstart,rend = A->rend,row; 113 PetscTruth roworiented = A->roworiented; 114 115 PetscFunctionBegin; 116 for (i=0; i<m; i++) { 117 if (idxm[i] < 0) continue; 118 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 119 if (idxm[i] >= rstart && idxm[i] < rend) { 120 row = idxm[i] - rstart; 121 if (roworiented) { 122 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 123 } else { 124 for (j=0; j<n; j++) { 125 if (idxn[j] < 0) continue; 126 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 127 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 128 } 129 } 130 } else { 131 if (!A->donotstash) { 132 if (roworiented) { 133 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 134 } else { 135 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 136 } 137 } 138 } 139 } 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatGetValues_MPIDense" 145 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 146 { 147 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 148 PetscErrorCode ierr; 149 PetscInt i,j,rstart = mdn->rstart,rend = mdn->rend,row; 150 151 PetscFunctionBegin; 152 for (i=0; i<m; i++) { 153 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 154 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 155 if (idxm[i] >= rstart && idxm[i] < rend) { 156 row = idxm[i] - rstart; 157 for (j=0; j<n; j++) { 158 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 159 if (idxn[j] >= mat->N) { 160 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 161 } 162 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 163 } 164 } else { 165 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 166 } 167 } 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "MatGetArray_MPIDense" 173 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 174 { 175 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 176 PetscErrorCode ierr; 177 178 PetscFunctionBegin; 179 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 180 PetscFunctionReturn(0); 181 } 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 185 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 186 { 187 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 188 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 189 PetscErrorCode ierr; 190 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 191 PetscScalar *av,*bv,*v = lmat->v; 192 Mat newmat; 193 194 PetscFunctionBegin; 195 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 196 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 197 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 198 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 199 200 /* No parallel redistribution currently supported! Should really check each index set 201 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 202 original matrix! */ 203 204 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 205 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 206 207 /* Check submatrix call */ 208 if (scall == MAT_REUSE_MATRIX) { 209 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 210 /* Really need to test rows and column sizes! */ 211 newmat = *B; 212 } else { 213 /* Create and fill new matrix */ 214 ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr); 215 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 216 ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr); 217 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 218 } 219 220 /* Now extract the data pointers and do the copy, column at a time */ 221 newmatd = (Mat_MPIDense*)newmat->data; 222 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 223 224 for (i=0; i<ncols; i++) { 225 av = v + nlrows*icol[i]; 226 for (j=0; j<nrows; j++) { 227 *bv++ = av[irow[j] - rstart]; 228 } 229 } 230 231 /* Assemble the matrices so that the correct flags are set */ 232 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 234 235 /* Free work space */ 236 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 237 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 238 *B = newmat; 239 PetscFunctionReturn(0); 240 } 241 242 #undef __FUNCT__ 243 #define __FUNCT__ "MatRestoreArray_MPIDense" 244 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 245 { 246 PetscFunctionBegin; 247 PetscFunctionReturn(0); 248 } 249 250 #undef __FUNCT__ 251 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 252 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 253 { 254 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 255 MPI_Comm comm = mat->comm; 256 PetscErrorCode ierr; 257 PetscInt nstash,reallocs; 258 InsertMode addv; 259 260 PetscFunctionBegin; 261 /* make sure all processors are either in INSERTMODE or ADDMODE */ 262 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 263 if (addv == (ADD_VALUES|INSERT_VALUES)) { 264 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 265 } 266 mat->insertmode = addv; /* in case this processor had no cache */ 267 268 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 269 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 270 ierr = PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 276 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 277 { 278 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 279 PetscErrorCode ierr; 280 PetscInt i,*row,*col,flg,j,rstart,ncols; 281 PetscMPIInt n; 282 PetscScalar *val; 283 InsertMode addv=mat->insertmode; 284 285 PetscFunctionBegin; 286 /* wait on receives */ 287 while (1) { 288 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 289 if (!flg) break; 290 291 for (i=0; i<n;) { 292 /* Now identify the consecutive vals belonging to the same row */ 293 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 294 if (j < n) ncols = j-i; 295 else ncols = n-i; 296 /* Now assemble all these values with a single function call */ 297 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 298 i = j; 299 } 300 } 301 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 302 303 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 304 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 305 306 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 307 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 308 } 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatZeroEntries_MPIDense" 314 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 315 { 316 PetscErrorCode ierr; 317 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 318 319 PetscFunctionBegin; 320 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 /* the code does not do the diagonal entries correctly unless the 325 matrix is square and the column and row owerships are identical. 326 This is a BUG. The only way to fix it seems to be to access 327 mdn->A and mdn->B directly and not through the MatZeroRows() 328 routine. 329 */ 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatZeroRows_MPIDense" 332 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 333 { 334 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 335 PetscErrorCode ierr; 336 PetscInt i,*owners = l->rowners; 337 PetscInt *nprocs,j,idx,nsends; 338 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 339 PetscInt *rvalues,tag = A->tag,count,base,slen,*source; 340 PetscInt *lens,*lrows,*values; 341 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 342 MPI_Comm comm = A->comm; 343 MPI_Request *send_waits,*recv_waits; 344 MPI_Status recv_status,*send_status; 345 PetscTruth found; 346 347 PetscFunctionBegin; 348 /* first count number of contributors to each processor */ 349 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 350 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 352 for (i=0; i<N; i++) { 353 idx = rows[i]; 354 found = PETSC_FALSE; 355 for (j=0; j<size; j++) { 356 if (idx >= owners[j] && idx < owners[j+1]) { 357 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 358 } 359 } 360 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 361 } 362 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 363 364 /* inform other processors of number of messages and max length*/ 365 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 366 367 /* post receives: */ 368 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 369 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 370 for (i=0; i<nrecvs; i++) { 371 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 372 } 373 374 /* do sends: 375 1) starts[i] gives the starting index in svalues for stuff going to 376 the ith processor 377 */ 378 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 379 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 380 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 381 starts[0] = 0; 382 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 383 for (i=0; i<N; i++) { 384 svalues[starts[owner[i]]++] = rows[i]; 385 } 386 387 starts[0] = 0; 388 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 389 count = 0; 390 for (i=0; i<size; i++) { 391 if (nprocs[2*i+1]) { 392 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 393 } 394 } 395 ierr = PetscFree(starts);CHKERRQ(ierr); 396 397 base = owners[rank]; 398 399 /* wait on receives */ 400 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 401 source = lens + nrecvs; 402 count = nrecvs; slen = 0; 403 while (count) { 404 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 405 /* unpack receives into our local space */ 406 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 407 source[imdex] = recv_status.MPI_SOURCE; 408 lens[imdex] = n; 409 slen += n; 410 count--; 411 } 412 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 413 414 /* move the data into the send scatter */ 415 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 416 count = 0; 417 for (i=0; i<nrecvs; i++) { 418 values = rvalues + i*nmax; 419 for (j=0; j<lens[i]; j++) { 420 lrows[count++] = values[j] - base; 421 } 422 } 423 ierr = PetscFree(rvalues);CHKERRQ(ierr); 424 ierr = PetscFree(lens);CHKERRQ(ierr); 425 ierr = PetscFree(owner);CHKERRQ(ierr); 426 ierr = PetscFree(nprocs);CHKERRQ(ierr); 427 428 /* actually zap the local rows */ 429 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 430 ierr = PetscFree(lrows);CHKERRQ(ierr); 431 432 /* wait on sends */ 433 if (nsends) { 434 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 435 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 436 ierr = PetscFree(send_status);CHKERRQ(ierr); 437 } 438 ierr = PetscFree(send_waits);CHKERRQ(ierr); 439 ierr = PetscFree(svalues);CHKERRQ(ierr); 440 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatMult_MPIDense" 446 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 447 { 448 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 449 PetscErrorCode ierr; 450 451 PetscFunctionBegin; 452 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 453 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 454 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatMultAdd_MPIDense" 460 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 461 { 462 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 463 PetscErrorCode ierr; 464 465 PetscFunctionBegin; 466 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 467 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 468 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatMultTranspose_MPIDense" 474 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 475 { 476 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 477 PetscErrorCode ierr; 478 PetscScalar zero = 0.0; 479 480 PetscFunctionBegin; 481 ierr = VecSet(yy,zero);CHKERRQ(ierr); 482 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 483 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 484 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNCT__ 489 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 491 { 492 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 493 PetscErrorCode ierr; 494 495 PetscFunctionBegin; 496 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 497 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 498 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 499 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 500 PetscFunctionReturn(0); 501 } 502 503 #undef __FUNCT__ 504 #define __FUNCT__ "MatGetDiagonal_MPIDense" 505 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 506 { 507 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 508 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 509 PetscErrorCode ierr; 510 PetscInt len,i,n,m = A->m,radd; 511 PetscScalar *x,zero = 0.0; 512 513 PetscFunctionBegin; 514 ierr = VecSet(v,zero);CHKERRQ(ierr); 515 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 516 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 517 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 518 len = PetscMin(a->A->m,a->A->n); 519 radd = a->rstart*m; 520 for (i=0; i<len; i++) { 521 x[i] = aloc->v[radd + i*m + i]; 522 } 523 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 524 PetscFunctionReturn(0); 525 } 526 527 #undef __FUNCT__ 528 #define __FUNCT__ "MatDestroy_MPIDense" 529 PetscErrorCode MatDestroy_MPIDense(Mat mat) 530 { 531 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 532 PetscErrorCode ierr; 533 534 PetscFunctionBegin; 535 536 #if defined(PETSC_USE_LOG) 537 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 538 #endif 539 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 540 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 541 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 542 if (mdn->lvec) VecDestroy(mdn->lvec); 543 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 544 if (mdn->factor) { 545 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 546 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 547 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 548 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 549 } 550 ierr = PetscFree(mdn);CHKERRQ(ierr); 551 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 552 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 553 PetscFunctionReturn(0); 554 } 555 556 #undef __FUNCT__ 557 #define __FUNCT__ "MatView_MPIDense_Binary" 558 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 559 { 560 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 561 PetscErrorCode ierr; 562 563 PetscFunctionBegin; 564 if (mdn->size == 1) { 565 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 566 } 567 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 573 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 574 { 575 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 576 PetscErrorCode ierr; 577 PetscMPIInt size = mdn->size,rank = mdn->rank; 578 PetscViewerType vtype; 579 PetscTruth iascii,isdraw; 580 PetscViewer sviewer; 581 PetscViewerFormat format; 582 583 PetscFunctionBegin; 584 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 585 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 586 if (iascii) { 587 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 588 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 589 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 590 MatInfo info; 591 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 592 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m, 593 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 594 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 595 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 596 PetscFunctionReturn(0); 597 } else if (format == PETSC_VIEWER_ASCII_INFO) { 598 PetscFunctionReturn(0); 599 } 600 } else if (isdraw) { 601 PetscDraw draw; 602 PetscTruth isnull; 603 604 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 605 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 606 if (isnull) PetscFunctionReturn(0); 607 } 608 609 if (size == 1) { 610 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 611 } else { 612 /* assemble the entire matrix onto first processor. */ 613 Mat A; 614 PetscInt M = mat->M,N = mat->N,m,row,i,nz; 615 PetscInt *cols; 616 PetscScalar *vals; 617 618 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 619 if (!rank) { 620 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 621 } else { 622 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 623 } 624 /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */ 625 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 626 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 627 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 628 629 /* Copy the matrix ... This isn't the most efficient means, 630 but it's quick for now */ 631 A->insertmode = INSERT_VALUES; 632 row = mdn->rstart; m = mdn->A->m; 633 for (i=0; i<m; i++) { 634 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 635 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 636 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 637 row++; 638 } 639 640 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 641 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 642 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 643 if (!rank) { 644 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 645 } 646 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 647 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 648 ierr = MatDestroy(A);CHKERRQ(ierr); 649 } 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatView_MPIDense" 655 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 656 { 657 PetscErrorCode ierr; 658 PetscTruth iascii,isbinary,isdraw,issocket; 659 660 PetscFunctionBegin; 661 662 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 663 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 664 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 665 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 666 667 if (iascii || issocket || isdraw) { 668 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 669 } else if (isbinary) { 670 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 671 } else { 672 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 673 } 674 PetscFunctionReturn(0); 675 } 676 677 #undef __FUNCT__ 678 #define __FUNCT__ "MatGetInfo_MPIDense" 679 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 680 { 681 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 682 Mat mdn = mat->A; 683 PetscErrorCode ierr; 684 PetscReal isend[5],irecv[5]; 685 686 PetscFunctionBegin; 687 info->rows_global = (double)A->M; 688 info->columns_global = (double)A->N; 689 info->rows_local = (double)A->m; 690 info->columns_local = (double)A->N; 691 info->block_size = 1.0; 692 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 693 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 694 isend[3] = info->memory; isend[4] = info->mallocs; 695 if (flag == MAT_LOCAL) { 696 info->nz_used = isend[0]; 697 info->nz_allocated = isend[1]; 698 info->nz_unneeded = isend[2]; 699 info->memory = isend[3]; 700 info->mallocs = isend[4]; 701 } else if (flag == MAT_GLOBAL_MAX) { 702 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 703 info->nz_used = irecv[0]; 704 info->nz_allocated = irecv[1]; 705 info->nz_unneeded = irecv[2]; 706 info->memory = irecv[3]; 707 info->mallocs = irecv[4]; 708 } else if (flag == MAT_GLOBAL_SUM) { 709 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 710 info->nz_used = irecv[0]; 711 info->nz_allocated = irecv[1]; 712 info->nz_unneeded = irecv[2]; 713 info->memory = irecv[3]; 714 info->mallocs = irecv[4]; 715 } 716 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 717 info->fill_ratio_needed = 0; 718 info->factor_mallocs = 0; 719 PetscFunctionReturn(0); 720 } 721 722 #undef __FUNCT__ 723 #define __FUNCT__ "MatSetOption_MPIDense" 724 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op) 725 { 726 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 727 PetscErrorCode ierr; 728 729 PetscFunctionBegin; 730 switch (op) { 731 case MAT_NO_NEW_NONZERO_LOCATIONS: 732 case MAT_YES_NEW_NONZERO_LOCATIONS: 733 case MAT_NEW_NONZERO_LOCATION_ERR: 734 case MAT_NEW_NONZERO_ALLOCATION_ERR: 735 case MAT_COLUMNS_SORTED: 736 case MAT_COLUMNS_UNSORTED: 737 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 738 break; 739 case MAT_ROW_ORIENTED: 740 a->roworiented = PETSC_TRUE; 741 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 742 break; 743 case MAT_ROWS_SORTED: 744 case MAT_ROWS_UNSORTED: 745 case MAT_YES_NEW_DIAGONALS: 746 case MAT_USE_HASH_TABLE: 747 ierr = PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));CHKERRQ(ierr); 748 break; 749 case MAT_COLUMN_ORIENTED: 750 a->roworiented = PETSC_FALSE; 751 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 752 break; 753 case MAT_IGNORE_OFF_PROC_ENTRIES: 754 a->donotstash = PETSC_TRUE; 755 break; 756 case MAT_NO_NEW_DIAGONALS: 757 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 758 case MAT_SYMMETRIC: 759 case MAT_STRUCTURALLY_SYMMETRIC: 760 case MAT_NOT_SYMMETRIC: 761 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 762 case MAT_HERMITIAN: 763 case MAT_NOT_HERMITIAN: 764 case MAT_SYMMETRY_ETERNAL: 765 case MAT_NOT_SYMMETRY_ETERNAL: 766 break; 767 default: 768 SETERRQ(PETSC_ERR_SUP,"unknown option"); 769 } 770 PetscFunctionReturn(0); 771 } 772 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "MatDiagonalScale_MPIDense" 776 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 777 { 778 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 779 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 780 PetscScalar *l,*r,x,*v; 781 PetscErrorCode ierr; 782 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 783 784 PetscFunctionBegin; 785 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 786 if (ll) { 787 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 788 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 789 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 790 for (i=0; i<m; i++) { 791 x = l[i]; 792 v = mat->v + i; 793 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 794 } 795 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 796 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 797 } 798 if (rr) { 799 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 800 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 801 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 802 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 803 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 804 for (i=0; i<n; i++) { 805 x = r[i]; 806 v = mat->v + i*m; 807 for (j=0; j<m; j++) { (*v++) *= x;} 808 } 809 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 810 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 811 } 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "MatNorm_MPIDense" 817 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 818 { 819 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 820 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 821 PetscErrorCode ierr; 822 PetscInt i,j; 823 PetscReal sum = 0.0; 824 PetscScalar *v = mat->v; 825 826 PetscFunctionBegin; 827 if (mdn->size == 1) { 828 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 829 } else { 830 if (type == NORM_FROBENIUS) { 831 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 832 #if defined(PETSC_USE_COMPLEX) 833 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 834 #else 835 sum += (*v)*(*v); v++; 836 #endif 837 } 838 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 839 *nrm = sqrt(*nrm); 840 ierr = PetscLogFlops(2*mdn->A->n*mdn->A->m);CHKERRQ(ierr); 841 } else if (type == NORM_1) { 842 PetscReal *tmp,*tmp2; 843 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 844 tmp2 = tmp + A->N; 845 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 846 *nrm = 0.0; 847 v = mat->v; 848 for (j=0; j<mdn->A->n; j++) { 849 for (i=0; i<mdn->A->m; i++) { 850 tmp[j] += PetscAbsScalar(*v); v++; 851 } 852 } 853 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 854 for (j=0; j<A->N; j++) { 855 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 856 } 857 ierr = PetscFree(tmp);CHKERRQ(ierr); 858 ierr = PetscLogFlops(A->n*A->m);CHKERRQ(ierr); 859 } else if (type == NORM_INFINITY) { /* max row norm */ 860 PetscReal ntemp; 861 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 862 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 863 } else { 864 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 865 } 866 } 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatTranspose_MPIDense" 872 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout) 873 { 874 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 875 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 876 Mat B; 877 PetscInt M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 878 PetscErrorCode ierr; 879 PetscInt j,i; 880 PetscScalar *v; 881 882 PetscFunctionBegin; 883 if (!matout && M != N) { 884 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 885 } 886 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 887 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 888 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 889 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 890 891 m = a->A->m; n = a->A->n; v = Aloc->v; 892 ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 893 for (j=0; j<n; j++) { 894 for (i=0; i<m; i++) rwork[i] = rstart + i; 895 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 896 v += m; 897 } 898 ierr = PetscFree(rwork);CHKERRQ(ierr); 899 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 900 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 901 if (matout) { 902 *matout = B; 903 } else { 904 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 905 } 906 PetscFunctionReturn(0); 907 } 908 909 #include "petscblaslapack.h" 910 #undef __FUNCT__ 911 #define __FUNCT__ "MatScale_MPIDense" 912 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 913 { 914 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 915 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 916 PetscScalar oalpha = alpha; 917 PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N; 918 PetscErrorCode ierr; 919 920 PetscFunctionBegin; 921 BLASscal_(&nz,&oalpha,a->v,&one); 922 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 923 PetscFunctionReturn(0); 924 } 925 926 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 930 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 931 { 932 PetscErrorCode ierr; 933 934 PetscFunctionBegin; 935 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } 938 939 /* -------------------------------------------------------------------*/ 940 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 941 MatGetRow_MPIDense, 942 MatRestoreRow_MPIDense, 943 MatMult_MPIDense, 944 /* 4*/ MatMultAdd_MPIDense, 945 MatMultTranspose_MPIDense, 946 MatMultTransposeAdd_MPIDense, 947 0, 948 0, 949 0, 950 /*10*/ 0, 951 0, 952 0, 953 0, 954 MatTranspose_MPIDense, 955 /*15*/ MatGetInfo_MPIDense, 956 0, 957 MatGetDiagonal_MPIDense, 958 MatDiagonalScale_MPIDense, 959 MatNorm_MPIDense, 960 /*20*/ MatAssemblyBegin_MPIDense, 961 MatAssemblyEnd_MPIDense, 962 0, 963 MatSetOption_MPIDense, 964 MatZeroEntries_MPIDense, 965 /*25*/ MatZeroRows_MPIDense, 966 0, 967 0, 968 0, 969 0, 970 /*30*/ MatSetUpPreallocation_MPIDense, 971 0, 972 0, 973 MatGetArray_MPIDense, 974 MatRestoreArray_MPIDense, 975 /*35*/ MatDuplicate_MPIDense, 976 0, 977 0, 978 0, 979 0, 980 /*40*/ 0, 981 MatGetSubMatrices_MPIDense, 982 0, 983 MatGetValues_MPIDense, 984 0, 985 /*45*/ 0, 986 MatScale_MPIDense, 987 0, 988 0, 989 0, 990 /*50*/ 0, 991 0, 992 0, 993 0, 994 0, 995 /*55*/ 0, 996 0, 997 0, 998 0, 999 0, 1000 /*60*/ MatGetSubMatrix_MPIDense, 1001 MatDestroy_MPIDense, 1002 MatView_MPIDense, 1003 MatGetPetscMaps_Petsc, 1004 0, 1005 /*65*/ 0, 1006 0, 1007 0, 1008 0, 1009 0, 1010 /*70*/ 0, 1011 0, 1012 0, 1013 0, 1014 0, 1015 /*75*/ 0, 1016 0, 1017 0, 1018 0, 1019 0, 1020 /*80*/ 0, 1021 0, 1022 0, 1023 0, 1024 /*84*/ MatLoad_MPIDense, 1025 0, 1026 0, 1027 0, 1028 0, 1029 0, 1030 /*90*/ 0, 1031 0, 1032 0, 1033 0, 1034 0, 1035 /*95*/ 0, 1036 0, 1037 0, 1038 0}; 1039 1040 EXTERN_C_BEGIN 1041 #undef __FUNCT__ 1042 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1043 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1044 { 1045 Mat_MPIDense *a; 1046 PetscErrorCode ierr; 1047 1048 PetscFunctionBegin; 1049 mat->preallocated = PETSC_TRUE; 1050 /* Note: For now, when data is specified above, this assumes the user correctly 1051 allocates the local dense storage space. We should add error checking. */ 1052 1053 a = (Mat_MPIDense*)mat->data; 1054 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1055 ierr = MatSetSizes(a->A,mat->m,mat->N,mat->m,mat->N);CHKERRQ(ierr); 1056 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1057 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1058 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } 1061 EXTERN_C_END 1062 1063 /*MC 1064 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1065 1066 Options Database Keys: 1067 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1068 1069 Level: beginner 1070 1071 .seealso: MatCreateMPIDense 1072 M*/ 1073 1074 EXTERN_C_BEGIN 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatCreate_MPIDense" 1077 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1078 { 1079 Mat_MPIDense *a; 1080 PetscErrorCode ierr; 1081 PetscInt i; 1082 1083 PetscFunctionBegin; 1084 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1085 mat->data = (void*)a; 1086 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1087 mat->factor = 0; 1088 mat->mapping = 0; 1089 1090 a->factor = 0; 1091 mat->insertmode = NOT_SET_VALUES; 1092 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 1093 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 1094 1095 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 1096 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 1097 a->nvec = mat->n; 1098 1099 /* the information in the maps duplicates the information computed below, eventually 1100 we should remove the duplicate information that is not contained in the maps */ 1101 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 1102 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 1103 1104 /* build local table of row and column ownerships */ 1105 ierr = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr); 1106 a->cowners = a->rowners + a->size + 1; 1107 ierr = PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr); 1108 ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr); 1109 a->rowners[0] = 0; 1110 for (i=2; i<=a->size; i++) { 1111 a->rowners[i] += a->rowners[i-1]; 1112 } 1113 a->rstart = a->rowners[a->rank]; 1114 a->rend = a->rowners[a->rank+1]; 1115 ierr = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr); 1116 a->cowners[0] = 0; 1117 for (i=2; i<=a->size; i++) { 1118 a->cowners[i] += a->cowners[i-1]; 1119 } 1120 1121 /* build cache for off array entries formed */ 1122 a->donotstash = PETSC_FALSE; 1123 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1124 1125 /* stuff used for matrix vector multiply */ 1126 a->lvec = 0; 1127 a->Mvctx = 0; 1128 a->roworiented = PETSC_TRUE; 1129 1130 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1131 "MatGetDiagonalBlock_MPIDense", 1132 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1134 "MatMPIDenseSetPreallocation_MPIDense", 1135 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1136 PetscFunctionReturn(0); 1137 } 1138 EXTERN_C_END 1139 1140 /*MC 1141 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1142 1143 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1144 and MATMPIDENSE otherwise. 1145 1146 Options Database Keys: 1147 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1148 1149 Level: beginner 1150 1151 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1152 M*/ 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatCreate_Dense" 1157 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1158 { 1159 PetscErrorCode ierr; 1160 PetscMPIInt size; 1161 1162 PetscFunctionBegin; 1163 ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr); 1164 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1165 if (size == 1) { 1166 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1167 } else { 1168 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 EXTERN_C_END 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1176 /*@C 1177 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1178 1179 Not collective 1180 1181 Input Parameters: 1182 . A - the matrix 1183 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1184 to control all matrix memory allocation. 1185 1186 Notes: 1187 The dense format is fully compatible with standard Fortran 77 1188 storage by columns. 1189 1190 The data input variable is intended primarily for Fortran programmers 1191 who wish to allocate their own matrix memory space. Most users should 1192 set data=PETSC_NULL. 1193 1194 Level: intermediate 1195 1196 .keywords: matrix,dense, parallel 1197 1198 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1199 @*/ 1200 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1201 { 1202 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1203 1204 PetscFunctionBegin; 1205 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1206 if (f) { 1207 ierr = (*f)(mat,data);CHKERRQ(ierr); 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatCreateMPIDense" 1214 /*@C 1215 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1216 1217 Collective on MPI_Comm 1218 1219 Input Parameters: 1220 + comm - MPI communicator 1221 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1222 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1223 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1224 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1225 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1226 to control all matrix memory allocation. 1227 1228 Output Parameter: 1229 . A - the matrix 1230 1231 Notes: 1232 The dense format is fully compatible with standard Fortran 77 1233 storage by columns. 1234 1235 The data input variable is intended primarily for Fortran programmers 1236 who wish to allocate their own matrix memory space. Most users should 1237 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1238 1239 The user MUST specify either the local or global matrix dimensions 1240 (possibly both). 1241 1242 Level: intermediate 1243 1244 .keywords: matrix,dense, parallel 1245 1246 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1247 @*/ 1248 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1249 { 1250 PetscErrorCode ierr; 1251 PetscMPIInt size; 1252 1253 PetscFunctionBegin; 1254 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1255 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1256 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1257 if (size > 1) { 1258 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1259 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1260 } else { 1261 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1262 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1263 } 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatDuplicate_MPIDense" 1269 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1270 { 1271 Mat mat; 1272 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 *newmat = 0; 1277 ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr); 1278 ierr = MatSetSizes(mat,A->m,A->n,A->M,A->N);CHKERRQ(ierr); 1279 ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr); 1280 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1281 mat->data = (void*)a; 1282 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1283 mat->factor = A->factor; 1284 mat->assembled = PETSC_TRUE; 1285 mat->preallocated = PETSC_TRUE; 1286 1287 a->rstart = oldmat->rstart; 1288 a->rend = oldmat->rend; 1289 a->size = oldmat->size; 1290 a->rank = oldmat->rank; 1291 mat->insertmode = NOT_SET_VALUES; 1292 a->nvec = oldmat->nvec; 1293 a->donotstash = oldmat->donotstash; 1294 ierr = PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr); 1295 ierr = PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr); 1296 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1297 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1298 1299 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1300 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1301 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1302 *newmat = mat; 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #include "petscsys.h" 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1310 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1311 { 1312 PetscErrorCode ierr; 1313 PetscMPIInt rank,size; 1314 PetscInt *rowners,i,m,nz,j; 1315 PetscScalar *array,*vals,*vals_ptr; 1316 MPI_Status status; 1317 1318 PetscFunctionBegin; 1319 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1320 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1321 1322 /* determine ownership of all rows */ 1323 m = M/size + ((M % size) > rank); 1324 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1325 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1326 rowners[0] = 0; 1327 for (i=2; i<=size; i++) { 1328 rowners[i] += rowners[i-1]; 1329 } 1330 1331 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1332 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1333 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1334 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1335 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1336 1337 if (!rank) { 1338 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1339 1340 /* read in my part of the matrix numerical values */ 1341 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1342 1343 /* insert into matrix-by row (this is why cannot directly read into array */ 1344 vals_ptr = vals; 1345 for (i=0; i<m; i++) { 1346 for (j=0; j<N; j++) { 1347 array[i + j*m] = *vals_ptr++; 1348 } 1349 } 1350 1351 /* read in other processors and ship out */ 1352 for (i=1; i<size; i++) { 1353 nz = (rowners[i+1] - rowners[i])*N; 1354 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1355 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1356 } 1357 } else { 1358 /* receive numeric values */ 1359 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1360 1361 /* receive message of values*/ 1362 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1363 1364 /* insert into matrix-by row (this is why cannot directly read into array */ 1365 vals_ptr = vals; 1366 for (i=0; i<m; i++) { 1367 for (j=0; j<N; j++) { 1368 array[i + j*m] = *vals_ptr++; 1369 } 1370 } 1371 } 1372 ierr = PetscFree(rowners);CHKERRQ(ierr); 1373 ierr = PetscFree(vals);CHKERRQ(ierr); 1374 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1375 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatLoad_MPIDense" 1381 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1382 { 1383 Mat A; 1384 PetscScalar *vals,*svals; 1385 MPI_Comm comm = ((PetscObject)viewer)->comm; 1386 MPI_Status status; 1387 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1388 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1389 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1390 PetscInt i,nz,j,rstart,rend; 1391 int fd; 1392 PetscErrorCode ierr; 1393 1394 PetscFunctionBegin; 1395 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1396 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1397 if (!rank) { 1398 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1399 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1400 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1401 } 1402 1403 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1404 M = header[1]; N = header[2]; nz = header[3]; 1405 1406 /* 1407 Handle case where matrix is stored on disk as a dense matrix 1408 */ 1409 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1410 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 /* determine ownership of all rows */ 1415 m = M/size + ((M % size) > rank); 1416 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1417 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1418 rowners[0] = 0; 1419 for (i=2; i<=size; i++) { 1420 rowners[i] += rowners[i-1]; 1421 } 1422 rstart = rowners[rank]; 1423 rend = rowners[rank+1]; 1424 1425 /* distribute row lengths to all processors */ 1426 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1427 offlens = ourlens + (rend-rstart); 1428 if (!rank) { 1429 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1430 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1431 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1432 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1433 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1434 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1435 } else { 1436 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1437 } 1438 1439 if (!rank) { 1440 /* calculate the number of nonzeros on each processor */ 1441 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1442 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1443 for (i=0; i<size; i++) { 1444 for (j=rowners[i]; j< rowners[i+1]; j++) { 1445 procsnz[i] += rowlengths[j]; 1446 } 1447 } 1448 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1449 1450 /* determine max buffer needed and allocate it */ 1451 maxnz = 0; 1452 for (i=0; i<size; i++) { 1453 maxnz = PetscMax(maxnz,procsnz[i]); 1454 } 1455 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1456 1457 /* read in my part of the matrix column indices */ 1458 nz = procsnz[0]; 1459 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1460 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1461 1462 /* read in every one elses and ship off */ 1463 for (i=1; i<size; i++) { 1464 nz = procsnz[i]; 1465 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1466 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1467 } 1468 ierr = PetscFree(cols);CHKERRQ(ierr); 1469 } else { 1470 /* determine buffer space needed for message */ 1471 nz = 0; 1472 for (i=0; i<m; i++) { 1473 nz += ourlens[i]; 1474 } 1475 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1476 1477 /* receive message of column indices*/ 1478 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1479 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1480 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1481 } 1482 1483 /* loop over local rows, determining number of off diagonal entries */ 1484 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1485 jj = 0; 1486 for (i=0; i<m; i++) { 1487 for (j=0; j<ourlens[i]; j++) { 1488 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1489 jj++; 1490 } 1491 } 1492 1493 /* create our matrix */ 1494 for (i=0; i<m; i++) { 1495 ourlens[i] -= offlens[i]; 1496 } 1497 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1498 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1499 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1500 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1501 A = *newmat; 1502 for (i=0; i<m; i++) { 1503 ourlens[i] += offlens[i]; 1504 } 1505 1506 if (!rank) { 1507 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1508 1509 /* read in my part of the matrix numerical values */ 1510 nz = procsnz[0]; 1511 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1512 1513 /* insert into matrix */ 1514 jj = rstart; 1515 smycols = mycols; 1516 svals = vals; 1517 for (i=0; i<m; i++) { 1518 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1519 smycols += ourlens[i]; 1520 svals += ourlens[i]; 1521 jj++; 1522 } 1523 1524 /* read in other processors and ship out */ 1525 for (i=1; i<size; i++) { 1526 nz = procsnz[i]; 1527 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1528 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1529 } 1530 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1531 } else { 1532 /* receive numeric values */ 1533 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1534 1535 /* receive message of values*/ 1536 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1537 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1538 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1539 1540 /* insert into matrix */ 1541 jj = rstart; 1542 smycols = mycols; 1543 svals = vals; 1544 for (i=0; i<m; i++) { 1545 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1546 smycols += ourlens[i]; 1547 svals += ourlens[i]; 1548 jj++; 1549 } 1550 } 1551 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1552 ierr = PetscFree(vals);CHKERRQ(ierr); 1553 ierr = PetscFree(mycols);CHKERRQ(ierr); 1554 ierr = PetscFree(rowners);CHKERRQ(ierr); 1555 1556 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1557 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 1562 1563 1564 1565