1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatGetRow_MPIDense" 11 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 12 { 13 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 14 PetscErrorCode ierr; 15 PetscInt lrow,rstart = mat->rstart,rend = mat->rend; 16 17 PetscFunctionBegin; 18 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 19 lrow = row - rstart; 20 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 21 PetscFunctionReturn(0); 22 } 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatRestoreRow_MPIDense" 26 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 27 { 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 32 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 33 PetscFunctionReturn(0); 34 } 35 36 EXTERN_C_BEGIN 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 39 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 40 { 41 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 42 PetscErrorCode ierr; 43 PetscInt m = A->m,rstart = mdn->rstart; 44 PetscScalar *array; 45 MPI_Comm comm; 46 47 PetscFunctionBegin; 48 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 49 50 /* The reuse aspect is not implemented efficiently */ 51 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 52 53 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 54 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 55 ierr = MatCreate(comm,m,m,m,m,B);CHKERRQ(ierr); 56 ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr); 57 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 58 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 59 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 60 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 61 62 *iscopy = PETSC_TRUE; 63 PetscFunctionReturn(0); 64 } 65 EXTERN_C_END 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "MatSetValues_MPIDense" 69 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 70 { 71 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 72 PetscErrorCode ierr; 73 PetscInt i,j,rstart = A->rstart,rend = A->rend,row; 74 PetscTruth roworiented = A->roworiented; 75 76 PetscFunctionBegin; 77 for (i=0; i<m; i++) { 78 if (idxm[i] < 0) continue; 79 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 80 if (idxm[i] >= rstart && idxm[i] < rend) { 81 row = idxm[i] - rstart; 82 if (roworiented) { 83 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 84 } else { 85 for (j=0; j<n; j++) { 86 if (idxn[j] < 0) continue; 87 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 88 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 89 } 90 } 91 } else { 92 if (!A->donotstash) { 93 if (roworiented) { 94 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 95 } else { 96 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 97 } 98 } 99 } 100 } 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatGetValues_MPIDense" 106 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 107 { 108 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 109 PetscErrorCode ierr; 110 PetscInt i,j,rstart = mdn->rstart,rend = mdn->rend,row; 111 112 PetscFunctionBegin; 113 for (i=0; i<m; i++) { 114 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 115 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 116 if (idxm[i] >= rstart && idxm[i] < rend) { 117 row = idxm[i] - rstart; 118 for (j=0; j<n; j++) { 119 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 120 if (idxn[j] >= mat->N) { 121 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 122 } 123 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 124 } 125 } else { 126 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 127 } 128 } 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatGetArray_MPIDense" 134 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 135 { 136 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 137 PetscErrorCode ierr; 138 139 PetscFunctionBegin; 140 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 141 PetscFunctionReturn(0); 142 } 143 144 #undef __FUNCT__ 145 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 146 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 147 { 148 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 149 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 150 PetscErrorCode ierr; 151 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 152 PetscScalar *av,*bv,*v = lmat->v; 153 Mat newmat; 154 155 PetscFunctionBegin; 156 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 157 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 158 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 159 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 160 161 /* No parallel redistribution currently supported! Should really check each index set 162 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 163 original matrix! */ 164 165 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 166 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 167 168 /* Check submatrix call */ 169 if (scall == MAT_REUSE_MATRIX) { 170 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 171 /* Really need to test rows and column sizes! */ 172 newmat = *B; 173 } else { 174 /* Create and fill new matrix */ 175 ierr = MatCreate(A->comm,nrows,cs,PETSC_DECIDE,ncols,&newmat);CHKERRQ(ierr); 176 ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr); 177 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 178 } 179 180 /* Now extract the data pointers and do the copy, column at a time */ 181 newmatd = (Mat_MPIDense*)newmat->data; 182 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 183 184 for (i=0; i<ncols; i++) { 185 av = v + nlrows*icol[i]; 186 for (j=0; j<nrows; j++) { 187 *bv++ = av[irow[j] - rstart]; 188 } 189 } 190 191 /* Assemble the matrices so that the correct flags are set */ 192 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 193 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 194 195 /* Free work space */ 196 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 197 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 198 *B = newmat; 199 PetscFunctionReturn(0); 200 } 201 202 #undef __FUNCT__ 203 #define __FUNCT__ "MatRestoreArray_MPIDense" 204 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 205 { 206 PetscFunctionBegin; 207 PetscFunctionReturn(0); 208 } 209 210 #undef __FUNCT__ 211 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 212 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 213 { 214 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 215 MPI_Comm comm = mat->comm; 216 PetscErrorCode ierr; 217 PetscInt nstash,reallocs; 218 InsertMode addv; 219 220 PetscFunctionBegin; 221 /* make sure all processors are either in INSERTMODE or ADDMODE */ 222 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 223 if (addv == (ADD_VALUES|INSERT_VALUES)) { 224 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 225 } 226 mat->insertmode = addv; /* in case this processor had no cache */ 227 228 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 229 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 230 ierr = PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 234 #undef __FUNCT__ 235 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 236 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 237 { 238 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 239 PetscErrorCode ierr; 240 PetscInt i,*row,*col,flg,j,rstart,ncols; 241 PetscMPIInt n; 242 PetscScalar *val; 243 InsertMode addv=mat->insertmode; 244 245 PetscFunctionBegin; 246 /* wait on receives */ 247 while (1) { 248 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 249 if (!flg) break; 250 251 for (i=0; i<n;) { 252 /* Now identify the consecutive vals belonging to the same row */ 253 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 254 if (j < n) ncols = j-i; 255 else ncols = n-i; 256 /* Now assemble all these values with a single function call */ 257 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 258 i = j; 259 } 260 } 261 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 262 263 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 264 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 265 266 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 267 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 268 } 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatZeroEntries_MPIDense" 274 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 275 { 276 PetscErrorCode ierr; 277 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 278 279 PetscFunctionBegin; 280 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 /* the code does not do the diagonal entries correctly unless the 285 matrix is square and the column and row owerships are identical. 286 This is a BUG. The only way to fix it seems to be to access 287 mdn->A and mdn->B directly and not through the MatZeroRows() 288 routine. 289 */ 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatZeroRows_MPIDense" 292 PetscErrorCode MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag) 293 { 294 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 295 PetscErrorCode ierr; 296 PetscInt i,N,*rows,*owners = l->rowners; 297 PetscInt *nprocs,j,idx,nsends; 298 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 299 PetscInt *rvalues,tag = A->tag,count,base,slen,*source; 300 PetscInt *lens,*lrows,*values; 301 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 302 MPI_Comm comm = A->comm; 303 MPI_Request *send_waits,*recv_waits; 304 MPI_Status recv_status,*send_status; 305 IS istmp; 306 PetscTruth found; 307 308 PetscFunctionBegin; 309 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 310 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 311 312 /* first count number of contributors to each processor */ 313 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 314 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 315 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 316 for (i=0; i<N; i++) { 317 idx = rows[i]; 318 found = PETSC_FALSE; 319 for (j=0; j<size; j++) { 320 if (idx >= owners[j] && idx < owners[j+1]) { 321 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 322 } 323 } 324 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 325 } 326 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 327 328 /* inform other processors of number of messages and max length*/ 329 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 330 331 /* post receives: */ 332 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 333 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 334 for (i=0; i<nrecvs; i++) { 335 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 336 } 337 338 /* do sends: 339 1) starts[i] gives the starting index in svalues for stuff going to 340 the ith processor 341 */ 342 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 343 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 344 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 345 starts[0] = 0; 346 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 347 for (i=0; i<N; i++) { 348 svalues[starts[owner[i]]++] = rows[i]; 349 } 350 ISRestoreIndices(is,&rows); 351 352 starts[0] = 0; 353 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 354 count = 0; 355 for (i=0; i<size; i++) { 356 if (nprocs[2*i+1]) { 357 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 358 } 359 } 360 ierr = PetscFree(starts);CHKERRQ(ierr); 361 362 base = owners[rank]; 363 364 /* wait on receives */ 365 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 366 source = lens + nrecvs; 367 count = nrecvs; slen = 0; 368 while (count) { 369 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 370 /* unpack receives into our local space */ 371 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 372 source[imdex] = recv_status.MPI_SOURCE; 373 lens[imdex] = n; 374 slen += n; 375 count--; 376 } 377 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 378 379 /* move the data into the send scatter */ 380 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 381 count = 0; 382 for (i=0; i<nrecvs; i++) { 383 values = rvalues + i*nmax; 384 for (j=0; j<lens[i]; j++) { 385 lrows[count++] = values[j] - base; 386 } 387 } 388 ierr = PetscFree(rvalues);CHKERRQ(ierr); 389 ierr = PetscFree(lens);CHKERRQ(ierr); 390 ierr = PetscFree(owner);CHKERRQ(ierr); 391 ierr = PetscFree(nprocs);CHKERRQ(ierr); 392 393 /* actually zap the local rows */ 394 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 395 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 396 ierr = PetscFree(lrows);CHKERRQ(ierr); 397 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 398 ierr = ISDestroy(istmp);CHKERRQ(ierr); 399 400 /* wait on sends */ 401 if (nsends) { 402 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 403 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 404 ierr = PetscFree(send_status);CHKERRQ(ierr); 405 } 406 ierr = PetscFree(send_waits);CHKERRQ(ierr); 407 ierr = PetscFree(svalues);CHKERRQ(ierr); 408 409 PetscFunctionReturn(0); 410 } 411 412 #undef __FUNCT__ 413 #define __FUNCT__ "MatMult_MPIDense" 414 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 415 { 416 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 417 PetscErrorCode ierr; 418 419 PetscFunctionBegin; 420 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 421 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 422 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatMultAdd_MPIDense" 428 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 429 { 430 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 431 PetscErrorCode ierr; 432 433 PetscFunctionBegin; 434 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 435 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 436 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 437 PetscFunctionReturn(0); 438 } 439 440 #undef __FUNCT__ 441 #define __FUNCT__ "MatMultTranspose_MPIDense" 442 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 443 { 444 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 445 PetscErrorCode ierr; 446 PetscScalar zero = 0.0; 447 448 PetscFunctionBegin; 449 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 450 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 451 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 452 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 458 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 459 { 460 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 465 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 466 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 467 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatGetDiagonal_MPIDense" 473 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 474 { 475 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 476 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 477 PetscErrorCode ierr; 478 PetscInt len,i,n,m = A->m,radd; 479 PetscScalar *x,zero = 0.0; 480 481 PetscFunctionBegin; 482 ierr = VecSet(&zero,v);CHKERRQ(ierr); 483 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 484 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 485 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 486 len = PetscMin(a->A->m,a->A->n); 487 radd = a->rstart*m; 488 for (i=0; i<len; i++) { 489 x[i] = aloc->v[radd + i*m + i]; 490 } 491 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatDestroy_MPIDense" 497 PetscErrorCode MatDestroy_MPIDense(Mat mat) 498 { 499 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 500 PetscErrorCode ierr; 501 502 PetscFunctionBegin; 503 504 #if defined(PETSC_USE_LOG) 505 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 506 #endif 507 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 508 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 509 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 510 if (mdn->lvec) VecDestroy(mdn->lvec); 511 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 512 if (mdn->factor) { 513 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 514 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 515 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 516 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 517 } 518 ierr = PetscFree(mdn);CHKERRQ(ierr); 519 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 520 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 521 PetscFunctionReturn(0); 522 } 523 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatView_MPIDense_Binary" 526 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 527 { 528 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 529 PetscErrorCode ierr; 530 531 PetscFunctionBegin; 532 if (mdn->size == 1) { 533 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 534 } 535 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 541 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 542 { 543 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 544 PetscErrorCode ierr; 545 PetscMPIInt size = mdn->size,rank = mdn->rank; 546 PetscViewerType vtype; 547 PetscTruth iascii,isdraw; 548 PetscViewer sviewer; 549 PetscViewerFormat format; 550 551 PetscFunctionBegin; 552 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 553 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 554 if (iascii) { 555 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 556 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 557 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 558 MatInfo info; 559 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 560 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m, 561 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 562 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 563 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } else if (format == PETSC_VIEWER_ASCII_INFO) { 566 PetscFunctionReturn(0); 567 } 568 } else if (isdraw) { 569 PetscDraw draw; 570 PetscTruth isnull; 571 572 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 573 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 574 if (isnull) PetscFunctionReturn(0); 575 } 576 577 if (size == 1) { 578 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 579 } else { 580 /* assemble the entire matrix onto first processor. */ 581 Mat A; 582 PetscInt M = mat->M,N = mat->N,m,row,i,nz; 583 PetscInt *cols; 584 PetscScalar *vals; 585 586 if (!rank) { 587 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 588 } else { 589 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 590 } 591 /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */ 592 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 593 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 594 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 595 596 /* Copy the matrix ... This isn't the most efficient means, 597 but it's quick for now */ 598 A->insertmode = INSERT_VALUES; 599 row = mdn->rstart; m = mdn->A->m; 600 for (i=0; i<m; i++) { 601 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 602 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 603 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 604 row++; 605 } 606 607 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 608 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 609 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 610 if (!rank) { 611 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 612 } 613 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 614 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 615 ierr = MatDestroy(A);CHKERRQ(ierr); 616 } 617 PetscFunctionReturn(0); 618 } 619 620 #undef __FUNCT__ 621 #define __FUNCT__ "MatView_MPIDense" 622 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 623 { 624 PetscErrorCode ierr; 625 PetscTruth iascii,isbinary,isdraw,issocket; 626 627 PetscFunctionBegin; 628 629 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 630 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 631 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 632 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 633 634 if (iascii || issocket || isdraw) { 635 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 636 } else if (isbinary) { 637 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 638 } else { 639 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 640 } 641 PetscFunctionReturn(0); 642 } 643 644 #undef __FUNCT__ 645 #define __FUNCT__ "MatGetInfo_MPIDense" 646 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 647 { 648 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 649 Mat mdn = mat->A; 650 PetscErrorCode ierr; 651 PetscReal isend[5],irecv[5]; 652 653 PetscFunctionBegin; 654 info->rows_global = (double)A->M; 655 info->columns_global = (double)A->N; 656 info->rows_local = (double)A->m; 657 info->columns_local = (double)A->N; 658 info->block_size = 1.0; 659 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 660 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 661 isend[3] = info->memory; isend[4] = info->mallocs; 662 if (flag == MAT_LOCAL) { 663 info->nz_used = isend[0]; 664 info->nz_allocated = isend[1]; 665 info->nz_unneeded = isend[2]; 666 info->memory = isend[3]; 667 info->mallocs = isend[4]; 668 } else if (flag == MAT_GLOBAL_MAX) { 669 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 670 info->nz_used = irecv[0]; 671 info->nz_allocated = irecv[1]; 672 info->nz_unneeded = irecv[2]; 673 info->memory = irecv[3]; 674 info->mallocs = irecv[4]; 675 } else if (flag == MAT_GLOBAL_SUM) { 676 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 677 info->nz_used = irecv[0]; 678 info->nz_allocated = irecv[1]; 679 info->nz_unneeded = irecv[2]; 680 info->memory = irecv[3]; 681 info->mallocs = irecv[4]; 682 } 683 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 684 info->fill_ratio_needed = 0; 685 info->factor_mallocs = 0; 686 PetscFunctionReturn(0); 687 } 688 689 #undef __FUNCT__ 690 #define __FUNCT__ "MatSetOption_MPIDense" 691 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op) 692 { 693 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 694 PetscErrorCode ierr; 695 696 PetscFunctionBegin; 697 switch (op) { 698 case MAT_NO_NEW_NONZERO_LOCATIONS: 699 case MAT_YES_NEW_NONZERO_LOCATIONS: 700 case MAT_NEW_NONZERO_LOCATION_ERR: 701 case MAT_NEW_NONZERO_ALLOCATION_ERR: 702 case MAT_COLUMNS_SORTED: 703 case MAT_COLUMNS_UNSORTED: 704 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 705 break; 706 case MAT_ROW_ORIENTED: 707 a->roworiented = PETSC_TRUE; 708 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 709 break; 710 case MAT_ROWS_SORTED: 711 case MAT_ROWS_UNSORTED: 712 case MAT_YES_NEW_DIAGONALS: 713 case MAT_USE_HASH_TABLE: 714 ierr = PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));CHKERRQ(ierr); 715 break; 716 case MAT_COLUMN_ORIENTED: 717 a->roworiented = PETSC_FALSE; 718 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 719 break; 720 case MAT_IGNORE_OFF_PROC_ENTRIES: 721 a->donotstash = PETSC_TRUE; 722 break; 723 case MAT_NO_NEW_DIAGONALS: 724 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 725 case MAT_SYMMETRIC: 726 case MAT_STRUCTURALLY_SYMMETRIC: 727 case MAT_NOT_SYMMETRIC: 728 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 729 case MAT_HERMITIAN: 730 case MAT_NOT_HERMITIAN: 731 case MAT_SYMMETRY_ETERNAL: 732 case MAT_NOT_SYMMETRY_ETERNAL: 733 break; 734 default: 735 SETERRQ(PETSC_ERR_SUP,"unknown option"); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatDiagonalScale_MPIDense" 743 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 744 { 745 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 746 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 747 PetscScalar *l,*r,x,*v; 748 PetscErrorCode ierr; 749 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 750 751 PetscFunctionBegin; 752 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 753 if (ll) { 754 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 755 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 756 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 757 for (i=0; i<m; i++) { 758 x = l[i]; 759 v = mat->v + i; 760 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 761 } 762 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 763 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 764 } 765 if (rr) { 766 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 767 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 768 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 769 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 770 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 771 for (i=0; i<n; i++) { 772 x = r[i]; 773 v = mat->v + i*m; 774 for (j=0; j<m; j++) { (*v++) *= x;} 775 } 776 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 777 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 778 } 779 PetscFunctionReturn(0); 780 } 781 782 #undef __FUNCT__ 783 #define __FUNCT__ "MatNorm_MPIDense" 784 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 785 { 786 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 787 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 788 PetscErrorCode ierr; 789 PetscInt i,j; 790 PetscReal sum = 0.0; 791 PetscScalar *v = mat->v; 792 793 PetscFunctionBegin; 794 if (mdn->size == 1) { 795 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 796 } else { 797 if (type == NORM_FROBENIUS) { 798 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 799 #if defined(PETSC_USE_COMPLEX) 800 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 801 #else 802 sum += (*v)*(*v); v++; 803 #endif 804 } 805 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 806 *nrm = sqrt(*nrm); 807 ierr = PetscLogFlops(2*mdn->A->n*mdn->A->m);CHKERRQ(ierr); 808 } else if (type == NORM_1) { 809 PetscReal *tmp,*tmp2; 810 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 811 tmp2 = tmp + A->N; 812 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 813 *nrm = 0.0; 814 v = mat->v; 815 for (j=0; j<mdn->A->n; j++) { 816 for (i=0; i<mdn->A->m; i++) { 817 tmp[j] += PetscAbsScalar(*v); v++; 818 } 819 } 820 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 821 for (j=0; j<A->N; j++) { 822 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 823 } 824 ierr = PetscFree(tmp);CHKERRQ(ierr); 825 ierr = PetscLogFlops(A->n*A->m);CHKERRQ(ierr); 826 } else if (type == NORM_INFINITY) { /* max row norm */ 827 PetscReal ntemp; 828 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 829 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 830 } else { 831 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 832 } 833 } 834 PetscFunctionReturn(0); 835 } 836 837 #undef __FUNCT__ 838 #define __FUNCT__ "MatTranspose_MPIDense" 839 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout) 840 { 841 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 842 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 843 Mat B; 844 PetscInt M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 845 PetscErrorCode ierr; 846 PetscInt j,i; 847 PetscScalar *v; 848 849 PetscFunctionBegin; 850 if (!matout && M != N) { 851 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 852 } 853 ierr = MatCreate(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B);CHKERRQ(ierr); 854 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 855 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 856 857 m = a->A->m; n = a->A->n; v = Aloc->v; 858 ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 859 for (j=0; j<n; j++) { 860 for (i=0; i<m; i++) rwork[i] = rstart + i; 861 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 862 v += m; 863 } 864 ierr = PetscFree(rwork);CHKERRQ(ierr); 865 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 866 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 867 if (matout) { 868 *matout = B; 869 } else { 870 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 871 } 872 PetscFunctionReturn(0); 873 } 874 875 #include "petscblaslapack.h" 876 #undef __FUNCT__ 877 #define __FUNCT__ "MatScale_MPIDense" 878 PetscErrorCode MatScale_MPIDense(const PetscScalar *alpha,Mat inA) 879 { 880 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 881 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 882 PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N; 883 PetscErrorCode ierr; 884 885 PetscFunctionBegin; 886 BLASscal_(&nz,(PetscScalar*)alpha,a->v,&one); 887 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 888 PetscFunctionReturn(0); 889 } 890 891 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 895 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 896 { 897 PetscErrorCode ierr; 898 899 PetscFunctionBegin; 900 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 /* -------------------------------------------------------------------*/ 905 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 906 MatGetRow_MPIDense, 907 MatRestoreRow_MPIDense, 908 MatMult_MPIDense, 909 /* 4*/ MatMultAdd_MPIDense, 910 MatMultTranspose_MPIDense, 911 MatMultTransposeAdd_MPIDense, 912 0, 913 0, 914 0, 915 /*10*/ 0, 916 0, 917 0, 918 0, 919 MatTranspose_MPIDense, 920 /*15*/ MatGetInfo_MPIDense, 921 0, 922 MatGetDiagonal_MPIDense, 923 MatDiagonalScale_MPIDense, 924 MatNorm_MPIDense, 925 /*20*/ MatAssemblyBegin_MPIDense, 926 MatAssemblyEnd_MPIDense, 927 0, 928 MatSetOption_MPIDense, 929 MatZeroEntries_MPIDense, 930 /*25*/ MatZeroRows_MPIDense, 931 0, 932 0, 933 0, 934 0, 935 /*30*/ MatSetUpPreallocation_MPIDense, 936 0, 937 0, 938 MatGetArray_MPIDense, 939 MatRestoreArray_MPIDense, 940 /*35*/ MatDuplicate_MPIDense, 941 0, 942 0, 943 0, 944 0, 945 /*40*/ 0, 946 MatGetSubMatrices_MPIDense, 947 0, 948 MatGetValues_MPIDense, 949 0, 950 /*45*/ 0, 951 MatScale_MPIDense, 952 0, 953 0, 954 0, 955 /*50*/ 0, 956 0, 957 0, 958 0, 959 0, 960 /*55*/ 0, 961 0, 962 0, 963 0, 964 0, 965 /*60*/ MatGetSubMatrix_MPIDense, 966 MatDestroy_MPIDense, 967 MatView_MPIDense, 968 MatGetPetscMaps_Petsc, 969 0, 970 /*65*/ 0, 971 0, 972 0, 973 0, 974 0, 975 /*70*/ 0, 976 0, 977 0, 978 0, 979 0, 980 /*75*/ 0, 981 0, 982 0, 983 0, 984 0, 985 /*80*/ 0, 986 0, 987 0, 988 0, 989 /*84*/ MatLoad_MPIDense, 990 0, 991 0, 992 0, 993 0, 994 0, 995 /*90*/ 0, 996 0, 997 0, 998 0, 999 0, 1000 /*95*/ 0, 1001 0, 1002 0, 1003 0}; 1004 1005 EXTERN_C_BEGIN 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1008 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1009 { 1010 Mat_MPIDense *a; 1011 PetscErrorCode ierr; 1012 1013 PetscFunctionBegin; 1014 mat->preallocated = PETSC_TRUE; 1015 /* Note: For now, when data is specified above, this assumes the user correctly 1016 allocates the local dense storage space. We should add error checking. */ 1017 1018 a = (Mat_MPIDense*)mat->data; 1019 ierr = MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);CHKERRQ(ierr); 1020 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1021 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1022 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1023 PetscFunctionReturn(0); 1024 } 1025 EXTERN_C_END 1026 1027 /*MC 1028 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1029 1030 Options Database Keys: 1031 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1032 1033 Level: beginner 1034 1035 .seealso: MatCreateMPIDense 1036 M*/ 1037 1038 EXTERN_C_BEGIN 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "MatCreate_MPIDense" 1041 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1042 { 1043 Mat_MPIDense *a; 1044 PetscErrorCode ierr; 1045 PetscInt i; 1046 1047 PetscFunctionBegin; 1048 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1049 mat->data = (void*)a; 1050 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1051 mat->factor = 0; 1052 mat->mapping = 0; 1053 1054 a->factor = 0; 1055 mat->insertmode = NOT_SET_VALUES; 1056 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 1057 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 1058 1059 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 1060 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 1061 a->nvec = mat->n; 1062 1063 /* the information in the maps duplicates the information computed below, eventually 1064 we should remove the duplicate information that is not contained in the maps */ 1065 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 1066 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 1067 1068 /* build local table of row and column ownerships */ 1069 ierr = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr); 1070 a->cowners = a->rowners + a->size + 1; 1071 ierr = PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr); 1072 ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr); 1073 a->rowners[0] = 0; 1074 for (i=2; i<=a->size; i++) { 1075 a->rowners[i] += a->rowners[i-1]; 1076 } 1077 a->rstart = a->rowners[a->rank]; 1078 a->rend = a->rowners[a->rank+1]; 1079 ierr = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr); 1080 a->cowners[0] = 0; 1081 for (i=2; i<=a->size; i++) { 1082 a->cowners[i] += a->cowners[i-1]; 1083 } 1084 1085 /* build cache for off array entries formed */ 1086 a->donotstash = PETSC_FALSE; 1087 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1088 1089 /* stuff used for matrix vector multiply */ 1090 a->lvec = 0; 1091 a->Mvctx = 0; 1092 a->roworiented = PETSC_TRUE; 1093 1094 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1095 "MatGetDiagonalBlock_MPIDense", 1096 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1098 "MatMPIDenseSetPreallocation_MPIDense", 1099 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 EXTERN_C_END 1103 1104 /*MC 1105 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1106 1107 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1108 and MATMPIDENSE otherwise. 1109 1110 Options Database Keys: 1111 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1112 1113 Level: beginner 1114 1115 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1116 M*/ 1117 1118 EXTERN_C_BEGIN 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatCreate_Dense" 1121 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1122 { 1123 PetscErrorCode ierr; 1124 PetscMPIInt size; 1125 1126 PetscFunctionBegin; 1127 ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr); 1128 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1129 if (size == 1) { 1130 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1131 } else { 1132 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1133 } 1134 PetscFunctionReturn(0); 1135 } 1136 EXTERN_C_END 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1140 /*@C 1141 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1142 1143 Not collective 1144 1145 Input Parameters: 1146 . A - the matrix 1147 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1148 to control all matrix memory allocation. 1149 1150 Notes: 1151 The dense format is fully compatible with standard Fortran 77 1152 storage by columns. 1153 1154 The data input variable is intended primarily for Fortran programmers 1155 who wish to allocate their own matrix memory space. Most users should 1156 set data=PETSC_NULL. 1157 1158 Level: intermediate 1159 1160 .keywords: matrix,dense, parallel 1161 1162 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1163 @*/ 1164 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1165 { 1166 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1167 1168 PetscFunctionBegin; 1169 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1170 if (f) { 1171 ierr = (*f)(mat,data);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatCreateMPIDense" 1178 /*@C 1179 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1180 1181 Collective on MPI_Comm 1182 1183 Input Parameters: 1184 + comm - MPI communicator 1185 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1186 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1187 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1188 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1189 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1190 to control all matrix memory allocation. 1191 1192 Output Parameter: 1193 . A - the matrix 1194 1195 Notes: 1196 The dense format is fully compatible with standard Fortran 77 1197 storage by columns. 1198 1199 The data input variable is intended primarily for Fortran programmers 1200 who wish to allocate their own matrix memory space. Most users should 1201 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1202 1203 The user MUST specify either the local or global matrix dimensions 1204 (possibly both). 1205 1206 Level: intermediate 1207 1208 .keywords: matrix,dense, parallel 1209 1210 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1211 @*/ 1212 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1213 { 1214 PetscErrorCode ierr; 1215 PetscMPIInt size; 1216 1217 PetscFunctionBegin; 1218 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1219 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1220 if (size > 1) { 1221 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1222 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1223 } else { 1224 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1225 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1226 } 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatDuplicate_MPIDense" 1232 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1233 { 1234 Mat mat; 1235 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 *newmat = 0; 1240 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1241 ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr); 1242 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1243 mat->data = (void*)a; 1244 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1245 mat->factor = A->factor; 1246 mat->assembled = PETSC_TRUE; 1247 mat->preallocated = PETSC_TRUE; 1248 1249 a->rstart = oldmat->rstart; 1250 a->rend = oldmat->rend; 1251 a->size = oldmat->size; 1252 a->rank = oldmat->rank; 1253 mat->insertmode = NOT_SET_VALUES; 1254 a->nvec = oldmat->nvec; 1255 a->donotstash = oldmat->donotstash; 1256 ierr = PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr); 1257 ierr = PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr); 1258 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1259 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1260 1261 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1262 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1263 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1264 *newmat = mat; 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #include "petscsys.h" 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1272 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,const MatType type,Mat *newmat) 1273 { 1274 PetscErrorCode ierr; 1275 PetscMPIInt rank,size; 1276 PetscInt *rowners,i,m,nz,j; 1277 PetscScalar *array,*vals,*vals_ptr; 1278 MPI_Status status; 1279 1280 PetscFunctionBegin; 1281 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1282 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1283 1284 /* determine ownership of all rows */ 1285 m = M/size + ((M % size) > rank); 1286 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1287 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1288 rowners[0] = 0; 1289 for (i=2; i<=size; i++) { 1290 rowners[i] += rowners[i-1]; 1291 } 1292 1293 ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr); 1294 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1295 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1296 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1297 1298 if (!rank) { 1299 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1300 1301 /* read in my part of the matrix numerical values */ 1302 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1303 1304 /* insert into matrix-by row (this is why cannot directly read into array */ 1305 vals_ptr = vals; 1306 for (i=0; i<m; i++) { 1307 for (j=0; j<N; j++) { 1308 array[i + j*m] = *vals_ptr++; 1309 } 1310 } 1311 1312 /* read in other processors and ship out */ 1313 for (i=1; i<size; i++) { 1314 nz = (rowners[i+1] - rowners[i])*N; 1315 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1316 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1317 } 1318 } else { 1319 /* receive numeric values */ 1320 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1321 1322 /* receive message of values*/ 1323 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1324 1325 /* insert into matrix-by row (this is why cannot directly read into array */ 1326 vals_ptr = vals; 1327 for (i=0; i<m; i++) { 1328 for (j=0; j<N; j++) { 1329 array[i + j*m] = *vals_ptr++; 1330 } 1331 } 1332 } 1333 ierr = PetscFree(rowners);CHKERRQ(ierr); 1334 ierr = PetscFree(vals);CHKERRQ(ierr); 1335 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1336 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatLoad_MPIDense" 1342 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1343 { 1344 Mat A; 1345 PetscScalar *vals,*svals; 1346 MPI_Comm comm = ((PetscObject)viewer)->comm; 1347 MPI_Status status; 1348 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1349 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1350 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1351 PetscInt i,nz,j,rstart,rend; 1352 int fd; 1353 PetscErrorCode ierr; 1354 1355 PetscFunctionBegin; 1356 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1357 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1358 if (!rank) { 1359 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1360 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1361 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1362 } 1363 1364 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1365 M = header[1]; N = header[2]; nz = header[3]; 1366 1367 /* 1368 Handle case where matrix is stored on disk as a dense matrix 1369 */ 1370 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1371 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 /* determine ownership of all rows */ 1376 m = M/size + ((M % size) > rank); 1377 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1378 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1379 rowners[0] = 0; 1380 for (i=2; i<=size; i++) { 1381 rowners[i] += rowners[i-1]; 1382 } 1383 rstart = rowners[rank]; 1384 rend = rowners[rank+1]; 1385 1386 /* distribute row lengths to all processors */ 1387 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1388 offlens = ourlens + (rend-rstart); 1389 if (!rank) { 1390 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1391 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1392 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1393 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1394 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1395 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1396 } else { 1397 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1398 } 1399 1400 if (!rank) { 1401 /* calculate the number of nonzeros on each processor */ 1402 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1403 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1404 for (i=0; i<size; i++) { 1405 for (j=rowners[i]; j< rowners[i+1]; j++) { 1406 procsnz[i] += rowlengths[j]; 1407 } 1408 } 1409 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1410 1411 /* determine max buffer needed and allocate it */ 1412 maxnz = 0; 1413 for (i=0; i<size; i++) { 1414 maxnz = PetscMax(maxnz,procsnz[i]); 1415 } 1416 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1417 1418 /* read in my part of the matrix column indices */ 1419 nz = procsnz[0]; 1420 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1421 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1422 1423 /* read in every one elses and ship off */ 1424 for (i=1; i<size; i++) { 1425 nz = procsnz[i]; 1426 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1427 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1428 } 1429 ierr = PetscFree(cols);CHKERRQ(ierr); 1430 } else { 1431 /* determine buffer space needed for message */ 1432 nz = 0; 1433 for (i=0; i<m; i++) { 1434 nz += ourlens[i]; 1435 } 1436 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1437 1438 /* receive message of column indices*/ 1439 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1440 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1441 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1442 } 1443 1444 /* loop over local rows, determining number of off diagonal entries */ 1445 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1446 jj = 0; 1447 for (i=0; i<m; i++) { 1448 for (j=0; j<ourlens[i]; j++) { 1449 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1450 jj++; 1451 } 1452 } 1453 1454 /* create our matrix */ 1455 for (i=0; i<m; i++) { 1456 ourlens[i] -= offlens[i]; 1457 } 1458 ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr); 1459 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1460 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1461 A = *newmat; 1462 for (i=0; i<m; i++) { 1463 ourlens[i] += offlens[i]; 1464 } 1465 1466 if (!rank) { 1467 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1468 1469 /* read in my part of the matrix numerical values */ 1470 nz = procsnz[0]; 1471 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1472 1473 /* insert into matrix */ 1474 jj = rstart; 1475 smycols = mycols; 1476 svals = vals; 1477 for (i=0; i<m; i++) { 1478 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1479 smycols += ourlens[i]; 1480 svals += ourlens[i]; 1481 jj++; 1482 } 1483 1484 /* read in other processors and ship out */ 1485 for (i=1; i<size; i++) { 1486 nz = procsnz[i]; 1487 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1488 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1489 } 1490 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1491 } else { 1492 /* receive numeric values */ 1493 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1494 1495 /* receive message of values*/ 1496 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1497 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1498 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1499 1500 /* insert into matrix */ 1501 jj = rstart; 1502 smycols = mycols; 1503 svals = vals; 1504 for (i=0; i<m; i++) { 1505 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1506 smycols += ourlens[i]; 1507 svals += ourlens[i]; 1508 jj++; 1509 } 1510 } 1511 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1512 ierr = PetscFree(vals);CHKERRQ(ierr); 1513 ierr = PetscFree(mycols);CHKERRQ(ierr); 1514 ierr = PetscFree(rowners);CHKERRQ(ierr); 1515 1516 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1517 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1518 PetscFunctionReturn(0); 1519 } 1520 1521 1522 1523 1524 1525