1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatGetRow_MPIDense" 11 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 12 { 13 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 14 PetscErrorCode ierr; 15 PetscInt lrow,rstart = mat->rstart,rend = mat->rend; 16 17 PetscFunctionBegin; 18 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 19 lrow = row - rstart; 20 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 21 PetscFunctionReturn(0); 22 } 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatRestoreRow_MPIDense" 26 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 27 { 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 32 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 33 PetscFunctionReturn(0); 34 } 35 36 EXTERN_C_BEGIN 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 39 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 40 { 41 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 42 PetscErrorCode ierr; 43 PetscInt m = A->m,rstart = mdn->rstart; 44 PetscScalar *array; 45 MPI_Comm comm; 46 47 PetscFunctionBegin; 48 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 49 50 /* The reuse aspect is not implemented efficiently */ 51 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 52 53 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 54 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 55 ierr = MatCreate(comm,B);CHKERRQ(ierr); 56 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 57 ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr); 58 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 59 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 60 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 61 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 62 63 *iscopy = PETSC_TRUE; 64 PetscFunctionReturn(0); 65 } 66 EXTERN_C_END 67 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatSetValues_MPIDense" 70 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 71 { 72 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 73 PetscErrorCode ierr; 74 PetscInt i,j,rstart = A->rstart,rend = A->rend,row; 75 PetscTruth roworiented = A->roworiented; 76 77 PetscFunctionBegin; 78 for (i=0; i<m; i++) { 79 if (idxm[i] < 0) continue; 80 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 81 if (idxm[i] >= rstart && idxm[i] < rend) { 82 row = idxm[i] - rstart; 83 if (roworiented) { 84 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 85 } else { 86 for (j=0; j<n; j++) { 87 if (idxn[j] < 0) continue; 88 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 89 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 90 } 91 } 92 } else { 93 if (!A->donotstash) { 94 if (roworiented) { 95 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 96 } else { 97 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 98 } 99 } 100 } 101 } 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetValues_MPIDense" 107 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 108 { 109 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 110 PetscErrorCode ierr; 111 PetscInt i,j,rstart = mdn->rstart,rend = mdn->rend,row; 112 113 PetscFunctionBegin; 114 for (i=0; i<m; i++) { 115 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 116 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 117 if (idxm[i] >= rstart && idxm[i] < rend) { 118 row = idxm[i] - rstart; 119 for (j=0; j<n; j++) { 120 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 121 if (idxn[j] >= mat->N) { 122 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 123 } 124 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 125 } 126 } else { 127 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 128 } 129 } 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatGetArray_MPIDense" 135 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 136 { 137 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 138 PetscErrorCode ierr; 139 140 PetscFunctionBegin; 141 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 142 PetscFunctionReturn(0); 143 } 144 145 #undef __FUNCT__ 146 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 147 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 148 { 149 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 150 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 151 PetscErrorCode ierr; 152 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 153 PetscScalar *av,*bv,*v = lmat->v; 154 Mat newmat; 155 156 PetscFunctionBegin; 157 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 158 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 159 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 160 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 161 162 /* No parallel redistribution currently supported! Should really check each index set 163 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 164 original matrix! */ 165 166 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 167 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 168 169 /* Check submatrix call */ 170 if (scall == MAT_REUSE_MATRIX) { 171 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 172 /* Really need to test rows and column sizes! */ 173 newmat = *B; 174 } else { 175 /* Create and fill new matrix */ 176 ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr); 177 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 178 ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr); 179 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 180 } 181 182 /* Now extract the data pointers and do the copy, column at a time */ 183 newmatd = (Mat_MPIDense*)newmat->data; 184 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 185 186 for (i=0; i<ncols; i++) { 187 av = v + nlrows*icol[i]; 188 for (j=0; j<nrows; j++) { 189 *bv++ = av[irow[j] - rstart]; 190 } 191 } 192 193 /* Assemble the matrices so that the correct flags are set */ 194 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 195 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 196 197 /* Free work space */ 198 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 199 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 200 *B = newmat; 201 PetscFunctionReturn(0); 202 } 203 204 #undef __FUNCT__ 205 #define __FUNCT__ "MatRestoreArray_MPIDense" 206 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 207 { 208 PetscFunctionBegin; 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 214 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 215 { 216 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 217 MPI_Comm comm = mat->comm; 218 PetscErrorCode ierr; 219 PetscInt nstash,reallocs; 220 InsertMode addv; 221 222 PetscFunctionBegin; 223 /* make sure all processors are either in INSERTMODE or ADDMODE */ 224 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 225 if (addv == (ADD_VALUES|INSERT_VALUES)) { 226 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 227 } 228 mat->insertmode = addv; /* in case this processor had no cache */ 229 230 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 231 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 232 ierr = PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 238 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 239 { 240 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 241 PetscErrorCode ierr; 242 PetscInt i,*row,*col,flg,j,rstart,ncols; 243 PetscMPIInt n; 244 PetscScalar *val; 245 InsertMode addv=mat->insertmode; 246 247 PetscFunctionBegin; 248 /* wait on receives */ 249 while (1) { 250 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 251 if (!flg) break; 252 253 for (i=0; i<n;) { 254 /* Now identify the consecutive vals belonging to the same row */ 255 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 256 if (j < n) ncols = j-i; 257 else ncols = n-i; 258 /* Now assemble all these values with a single function call */ 259 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 260 i = j; 261 } 262 } 263 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 264 265 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 266 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 267 268 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 269 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatZeroEntries_MPIDense" 276 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 277 { 278 PetscErrorCode ierr; 279 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 280 281 PetscFunctionBegin; 282 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 /* the code does not do the diagonal entries correctly unless the 287 matrix is square and the column and row owerships are identical. 288 This is a BUG. The only way to fix it seems to be to access 289 mdn->A and mdn->B directly and not through the MatZeroRows() 290 routine. 291 */ 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatZeroRows_MPIDense" 294 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 295 { 296 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 297 PetscErrorCode ierr; 298 PetscInt i,*owners = l->rowners; 299 PetscInt *nprocs,j,idx,nsends; 300 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 301 PetscInt *rvalues,tag = A->tag,count,base,slen,*source; 302 PetscInt *lens,*lrows,*values; 303 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 304 MPI_Comm comm = A->comm; 305 MPI_Request *send_waits,*recv_waits; 306 MPI_Status recv_status,*send_status; 307 PetscTruth found; 308 309 PetscFunctionBegin; 310 /* first count number of contributors to each processor */ 311 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 312 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 313 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 314 for (i=0; i<N; i++) { 315 idx = rows[i]; 316 found = PETSC_FALSE; 317 for (j=0; j<size; j++) { 318 if (idx >= owners[j] && idx < owners[j+1]) { 319 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 320 } 321 } 322 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 323 } 324 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 325 326 /* inform other processors of number of messages and max length*/ 327 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 328 329 /* post receives: */ 330 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 331 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 332 for (i=0; i<nrecvs; i++) { 333 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 334 } 335 336 /* do sends: 337 1) starts[i] gives the starting index in svalues for stuff going to 338 the ith processor 339 */ 340 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 341 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 342 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 343 starts[0] = 0; 344 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 345 for (i=0; i<N; i++) { 346 svalues[starts[owner[i]]++] = rows[i]; 347 } 348 349 starts[0] = 0; 350 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 351 count = 0; 352 for (i=0; i<size; i++) { 353 if (nprocs[2*i+1]) { 354 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 355 } 356 } 357 ierr = PetscFree(starts);CHKERRQ(ierr); 358 359 base = owners[rank]; 360 361 /* wait on receives */ 362 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 363 source = lens + nrecvs; 364 count = nrecvs; slen = 0; 365 while (count) { 366 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 367 /* unpack receives into our local space */ 368 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 369 source[imdex] = recv_status.MPI_SOURCE; 370 lens[imdex] = n; 371 slen += n; 372 count--; 373 } 374 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 375 376 /* move the data into the send scatter */ 377 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 378 count = 0; 379 for (i=0; i<nrecvs; i++) { 380 values = rvalues + i*nmax; 381 for (j=0; j<lens[i]; j++) { 382 lrows[count++] = values[j] - base; 383 } 384 } 385 ierr = PetscFree(rvalues);CHKERRQ(ierr); 386 ierr = PetscFree(lens);CHKERRQ(ierr); 387 ierr = PetscFree(owner);CHKERRQ(ierr); 388 ierr = PetscFree(nprocs);CHKERRQ(ierr); 389 390 /* actually zap the local rows */ 391 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 392 ierr = PetscFree(lrows);CHKERRQ(ierr); 393 394 /* wait on sends */ 395 if (nsends) { 396 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 397 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 398 ierr = PetscFree(send_status);CHKERRQ(ierr); 399 } 400 ierr = PetscFree(send_waits);CHKERRQ(ierr); 401 ierr = PetscFree(svalues);CHKERRQ(ierr); 402 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatMult_MPIDense" 408 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 409 { 410 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 411 PetscErrorCode ierr; 412 413 PetscFunctionBegin; 414 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 415 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 416 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatMultAdd_MPIDense" 422 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 423 { 424 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 425 PetscErrorCode ierr; 426 427 PetscFunctionBegin; 428 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 429 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 430 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatMultTranspose_MPIDense" 436 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 437 { 438 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 439 PetscErrorCode ierr; 440 PetscScalar zero = 0.0; 441 442 PetscFunctionBegin; 443 ierr = VecSet(yy,zero);CHKERRQ(ierr); 444 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 445 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 446 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 452 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 453 { 454 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 455 PetscErrorCode ierr; 456 457 PetscFunctionBegin; 458 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 459 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 460 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 461 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 #undef __FUNCT__ 466 #define __FUNCT__ "MatGetDiagonal_MPIDense" 467 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 468 { 469 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 470 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 471 PetscErrorCode ierr; 472 PetscInt len,i,n,m = A->m,radd; 473 PetscScalar *x,zero = 0.0; 474 475 PetscFunctionBegin; 476 ierr = VecSet(v,zero);CHKERRQ(ierr); 477 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 478 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 479 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 480 len = PetscMin(a->A->m,a->A->n); 481 radd = a->rstart*m; 482 for (i=0; i<len; i++) { 483 x[i] = aloc->v[radd + i*m + i]; 484 } 485 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 486 PetscFunctionReturn(0); 487 } 488 489 #undef __FUNCT__ 490 #define __FUNCT__ "MatDestroy_MPIDense" 491 PetscErrorCode MatDestroy_MPIDense(Mat mat) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 494 PetscErrorCode ierr; 495 496 PetscFunctionBegin; 497 498 #if defined(PETSC_USE_LOG) 499 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 500 #endif 501 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 502 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 503 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 504 if (mdn->lvec) VecDestroy(mdn->lvec); 505 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 506 if (mdn->factor) { 507 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 508 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 509 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 510 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 511 } 512 ierr = PetscFree(mdn);CHKERRQ(ierr); 513 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 514 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 515 PetscFunctionReturn(0); 516 } 517 518 #undef __FUNCT__ 519 #define __FUNCT__ "MatView_MPIDense_Binary" 520 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 521 { 522 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 523 PetscErrorCode ierr; 524 525 PetscFunctionBegin; 526 if (mdn->size == 1) { 527 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 528 } 529 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 530 PetscFunctionReturn(0); 531 } 532 533 #undef __FUNCT__ 534 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 535 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 536 { 537 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 538 PetscErrorCode ierr; 539 PetscMPIInt size = mdn->size,rank = mdn->rank; 540 PetscViewerType vtype; 541 PetscTruth iascii,isdraw; 542 PetscViewer sviewer; 543 PetscViewerFormat format; 544 545 PetscFunctionBegin; 546 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 547 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 548 if (iascii) { 549 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 550 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 551 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 552 MatInfo info; 553 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 554 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m, 555 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 556 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 557 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } else if (format == PETSC_VIEWER_ASCII_INFO) { 560 PetscFunctionReturn(0); 561 } 562 } else if (isdraw) { 563 PetscDraw draw; 564 PetscTruth isnull; 565 566 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 567 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 568 if (isnull) PetscFunctionReturn(0); 569 } 570 571 if (size == 1) { 572 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 573 } else { 574 /* assemble the entire matrix onto first processor. */ 575 Mat A; 576 PetscInt M = mat->M,N = mat->N,m,row,i,nz; 577 PetscInt *cols; 578 PetscScalar *vals; 579 580 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 581 if (!rank) { 582 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 583 } else { 584 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 585 } 586 /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */ 587 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 588 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 589 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 590 591 /* Copy the matrix ... This isn't the most efficient means, 592 but it's quick for now */ 593 A->insertmode = INSERT_VALUES; 594 row = mdn->rstart; m = mdn->A->m; 595 for (i=0; i<m; i++) { 596 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 597 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 598 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 599 row++; 600 } 601 602 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 603 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 604 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 605 if (!rank) { 606 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 607 } 608 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 609 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 610 ierr = MatDestroy(A);CHKERRQ(ierr); 611 } 612 PetscFunctionReturn(0); 613 } 614 615 #undef __FUNCT__ 616 #define __FUNCT__ "MatView_MPIDense" 617 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 618 { 619 PetscErrorCode ierr; 620 PetscTruth iascii,isbinary,isdraw,issocket; 621 622 PetscFunctionBegin; 623 624 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 625 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 626 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 627 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 628 629 if (iascii || issocket || isdraw) { 630 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 631 } else if (isbinary) { 632 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 633 } else { 634 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 635 } 636 PetscFunctionReturn(0); 637 } 638 639 #undef __FUNCT__ 640 #define __FUNCT__ "MatGetInfo_MPIDense" 641 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 642 { 643 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 644 Mat mdn = mat->A; 645 PetscErrorCode ierr; 646 PetscReal isend[5],irecv[5]; 647 648 PetscFunctionBegin; 649 info->rows_global = (double)A->M; 650 info->columns_global = (double)A->N; 651 info->rows_local = (double)A->m; 652 info->columns_local = (double)A->N; 653 info->block_size = 1.0; 654 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 655 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 656 isend[3] = info->memory; isend[4] = info->mallocs; 657 if (flag == MAT_LOCAL) { 658 info->nz_used = isend[0]; 659 info->nz_allocated = isend[1]; 660 info->nz_unneeded = isend[2]; 661 info->memory = isend[3]; 662 info->mallocs = isend[4]; 663 } else if (flag == MAT_GLOBAL_MAX) { 664 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 665 info->nz_used = irecv[0]; 666 info->nz_allocated = irecv[1]; 667 info->nz_unneeded = irecv[2]; 668 info->memory = irecv[3]; 669 info->mallocs = irecv[4]; 670 } else if (flag == MAT_GLOBAL_SUM) { 671 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 672 info->nz_used = irecv[0]; 673 info->nz_allocated = irecv[1]; 674 info->nz_unneeded = irecv[2]; 675 info->memory = irecv[3]; 676 info->mallocs = irecv[4]; 677 } 678 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 679 info->fill_ratio_needed = 0; 680 info->factor_mallocs = 0; 681 PetscFunctionReturn(0); 682 } 683 684 #undef __FUNCT__ 685 #define __FUNCT__ "MatSetOption_MPIDense" 686 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op) 687 { 688 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 689 PetscErrorCode ierr; 690 691 PetscFunctionBegin; 692 switch (op) { 693 case MAT_NO_NEW_NONZERO_LOCATIONS: 694 case MAT_YES_NEW_NONZERO_LOCATIONS: 695 case MAT_NEW_NONZERO_LOCATION_ERR: 696 case MAT_NEW_NONZERO_ALLOCATION_ERR: 697 case MAT_COLUMNS_SORTED: 698 case MAT_COLUMNS_UNSORTED: 699 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 700 break; 701 case MAT_ROW_ORIENTED: 702 a->roworiented = PETSC_TRUE; 703 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 704 break; 705 case MAT_ROWS_SORTED: 706 case MAT_ROWS_UNSORTED: 707 case MAT_YES_NEW_DIAGONALS: 708 case MAT_USE_HASH_TABLE: 709 ierr = PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));CHKERRQ(ierr); 710 break; 711 case MAT_COLUMN_ORIENTED: 712 a->roworiented = PETSC_FALSE; 713 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 714 break; 715 case MAT_IGNORE_OFF_PROC_ENTRIES: 716 a->donotstash = PETSC_TRUE; 717 break; 718 case MAT_NO_NEW_DIAGONALS: 719 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 720 case MAT_SYMMETRIC: 721 case MAT_STRUCTURALLY_SYMMETRIC: 722 case MAT_NOT_SYMMETRIC: 723 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 724 case MAT_HERMITIAN: 725 case MAT_NOT_HERMITIAN: 726 case MAT_SYMMETRY_ETERNAL: 727 case MAT_NOT_SYMMETRY_ETERNAL: 728 break; 729 default: 730 SETERRQ(PETSC_ERR_SUP,"unknown option"); 731 } 732 PetscFunctionReturn(0); 733 } 734 735 736 #undef __FUNCT__ 737 #define __FUNCT__ "MatDiagonalScale_MPIDense" 738 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 739 { 740 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 741 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 742 PetscScalar *l,*r,x,*v; 743 PetscErrorCode ierr; 744 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 745 746 PetscFunctionBegin; 747 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 748 if (ll) { 749 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 750 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 751 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 752 for (i=0; i<m; i++) { 753 x = l[i]; 754 v = mat->v + i; 755 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 756 } 757 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 758 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 759 } 760 if (rr) { 761 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 762 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 763 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 764 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 765 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 766 for (i=0; i<n; i++) { 767 x = r[i]; 768 v = mat->v + i*m; 769 for (j=0; j<m; j++) { (*v++) *= x;} 770 } 771 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 772 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 773 } 774 PetscFunctionReturn(0); 775 } 776 777 #undef __FUNCT__ 778 #define __FUNCT__ "MatNorm_MPIDense" 779 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 780 { 781 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 782 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 783 PetscErrorCode ierr; 784 PetscInt i,j; 785 PetscReal sum = 0.0; 786 PetscScalar *v = mat->v; 787 788 PetscFunctionBegin; 789 if (mdn->size == 1) { 790 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 791 } else { 792 if (type == NORM_FROBENIUS) { 793 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 794 #if defined(PETSC_USE_COMPLEX) 795 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 796 #else 797 sum += (*v)*(*v); v++; 798 #endif 799 } 800 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 801 *nrm = sqrt(*nrm); 802 ierr = PetscLogFlops(2*mdn->A->n*mdn->A->m);CHKERRQ(ierr); 803 } else if (type == NORM_1) { 804 PetscReal *tmp,*tmp2; 805 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 806 tmp2 = tmp + A->N; 807 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 808 *nrm = 0.0; 809 v = mat->v; 810 for (j=0; j<mdn->A->n; j++) { 811 for (i=0; i<mdn->A->m; i++) { 812 tmp[j] += PetscAbsScalar(*v); v++; 813 } 814 } 815 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 816 for (j=0; j<A->N; j++) { 817 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 818 } 819 ierr = PetscFree(tmp);CHKERRQ(ierr); 820 ierr = PetscLogFlops(A->n*A->m);CHKERRQ(ierr); 821 } else if (type == NORM_INFINITY) { /* max row norm */ 822 PetscReal ntemp; 823 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 824 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 825 } else { 826 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 827 } 828 } 829 PetscFunctionReturn(0); 830 } 831 832 #undef __FUNCT__ 833 #define __FUNCT__ "MatTranspose_MPIDense" 834 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout) 835 { 836 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 837 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 838 Mat B; 839 PetscInt M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 840 PetscErrorCode ierr; 841 PetscInt j,i; 842 PetscScalar *v; 843 844 PetscFunctionBegin; 845 if (!matout && M != N) { 846 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 847 } 848 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 849 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 850 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 851 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 852 853 m = a->A->m; n = a->A->n; v = Aloc->v; 854 ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 855 for (j=0; j<n; j++) { 856 for (i=0; i<m; i++) rwork[i] = rstart + i; 857 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 858 v += m; 859 } 860 ierr = PetscFree(rwork);CHKERRQ(ierr); 861 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 863 if (matout) { 864 *matout = B; 865 } else { 866 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 867 } 868 PetscFunctionReturn(0); 869 } 870 871 #include "petscblaslapack.h" 872 #undef __FUNCT__ 873 #define __FUNCT__ "MatScale_MPIDense" 874 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 875 { 876 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 877 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 878 PetscScalar oalpha = alpha; 879 PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N; 880 PetscErrorCode ierr; 881 882 PetscFunctionBegin; 883 BLASscal_(&nz,&oalpha,a->v,&one); 884 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 885 PetscFunctionReturn(0); 886 } 887 888 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 892 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 893 { 894 PetscErrorCode ierr; 895 896 PetscFunctionBegin; 897 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } 900 901 /* -------------------------------------------------------------------*/ 902 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 903 MatGetRow_MPIDense, 904 MatRestoreRow_MPIDense, 905 MatMult_MPIDense, 906 /* 4*/ MatMultAdd_MPIDense, 907 MatMultTranspose_MPIDense, 908 MatMultTransposeAdd_MPIDense, 909 0, 910 0, 911 0, 912 /*10*/ 0, 913 0, 914 0, 915 0, 916 MatTranspose_MPIDense, 917 /*15*/ MatGetInfo_MPIDense, 918 0, 919 MatGetDiagonal_MPIDense, 920 MatDiagonalScale_MPIDense, 921 MatNorm_MPIDense, 922 /*20*/ MatAssemblyBegin_MPIDense, 923 MatAssemblyEnd_MPIDense, 924 0, 925 MatSetOption_MPIDense, 926 MatZeroEntries_MPIDense, 927 /*25*/ MatZeroRows_MPIDense, 928 0, 929 0, 930 0, 931 0, 932 /*30*/ MatSetUpPreallocation_MPIDense, 933 0, 934 0, 935 MatGetArray_MPIDense, 936 MatRestoreArray_MPIDense, 937 /*35*/ MatDuplicate_MPIDense, 938 0, 939 0, 940 0, 941 0, 942 /*40*/ 0, 943 MatGetSubMatrices_MPIDense, 944 0, 945 MatGetValues_MPIDense, 946 0, 947 /*45*/ 0, 948 MatScale_MPIDense, 949 0, 950 0, 951 0, 952 /*50*/ 0, 953 0, 954 0, 955 0, 956 0, 957 /*55*/ 0, 958 0, 959 0, 960 0, 961 0, 962 /*60*/ MatGetSubMatrix_MPIDense, 963 MatDestroy_MPIDense, 964 MatView_MPIDense, 965 MatGetPetscMaps_Petsc, 966 0, 967 /*65*/ 0, 968 0, 969 0, 970 0, 971 0, 972 /*70*/ 0, 973 0, 974 0, 975 0, 976 0, 977 /*75*/ 0, 978 0, 979 0, 980 0, 981 0, 982 /*80*/ 0, 983 0, 984 0, 985 0, 986 /*84*/ MatLoad_MPIDense, 987 0, 988 0, 989 0, 990 0, 991 0, 992 /*90*/ 0, 993 0, 994 0, 995 0, 996 0, 997 /*95*/ 0, 998 0, 999 0, 1000 0}; 1001 1002 EXTERN_C_BEGIN 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1005 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1006 { 1007 Mat_MPIDense *a; 1008 PetscErrorCode ierr; 1009 1010 PetscFunctionBegin; 1011 mat->preallocated = PETSC_TRUE; 1012 /* Note: For now, when data is specified above, this assumes the user correctly 1013 allocates the local dense storage space. We should add error checking. */ 1014 1015 a = (Mat_MPIDense*)mat->data; 1016 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1017 ierr = MatSetSizes(a->A,mat->m,mat->N,mat->m,mat->N);CHKERRQ(ierr); 1018 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1019 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1020 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 EXTERN_C_END 1024 1025 /*MC 1026 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1027 1028 Options Database Keys: 1029 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1030 1031 Level: beginner 1032 1033 .seealso: MatCreateMPIDense 1034 M*/ 1035 1036 EXTERN_C_BEGIN 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatCreate_MPIDense" 1039 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1040 { 1041 Mat_MPIDense *a; 1042 PetscErrorCode ierr; 1043 PetscInt i; 1044 1045 PetscFunctionBegin; 1046 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1047 mat->data = (void*)a; 1048 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1049 mat->factor = 0; 1050 mat->mapping = 0; 1051 1052 a->factor = 0; 1053 mat->insertmode = NOT_SET_VALUES; 1054 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 1055 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 1056 1057 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 1058 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 1059 a->nvec = mat->n; 1060 1061 /* the information in the maps duplicates the information computed below, eventually 1062 we should remove the duplicate information that is not contained in the maps */ 1063 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 1064 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 1065 1066 /* build local table of row and column ownerships */ 1067 ierr = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr); 1068 a->cowners = a->rowners + a->size + 1; 1069 ierr = PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr); 1070 ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr); 1071 a->rowners[0] = 0; 1072 for (i=2; i<=a->size; i++) { 1073 a->rowners[i] += a->rowners[i-1]; 1074 } 1075 a->rstart = a->rowners[a->rank]; 1076 a->rend = a->rowners[a->rank+1]; 1077 ierr = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr); 1078 a->cowners[0] = 0; 1079 for (i=2; i<=a->size; i++) { 1080 a->cowners[i] += a->cowners[i-1]; 1081 } 1082 1083 /* build cache for off array entries formed */ 1084 a->donotstash = PETSC_FALSE; 1085 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1086 1087 /* stuff used for matrix vector multiply */ 1088 a->lvec = 0; 1089 a->Mvctx = 0; 1090 a->roworiented = PETSC_TRUE; 1091 1092 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1093 "MatGetDiagonalBlock_MPIDense", 1094 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1095 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1096 "MatMPIDenseSetPreallocation_MPIDense", 1097 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 EXTERN_C_END 1101 1102 /*MC 1103 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1104 1105 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1106 and MATMPIDENSE otherwise. 1107 1108 Options Database Keys: 1109 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1110 1111 Level: beginner 1112 1113 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1114 M*/ 1115 1116 EXTERN_C_BEGIN 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatCreate_Dense" 1119 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1120 { 1121 PetscErrorCode ierr; 1122 PetscMPIInt size; 1123 1124 PetscFunctionBegin; 1125 ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr); 1126 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1127 if (size == 1) { 1128 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1129 } else { 1130 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1131 } 1132 PetscFunctionReturn(0); 1133 } 1134 EXTERN_C_END 1135 1136 #undef __FUNCT__ 1137 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1138 /*@C 1139 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1140 1141 Not collective 1142 1143 Input Parameters: 1144 . A - the matrix 1145 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1146 to control all matrix memory allocation. 1147 1148 Notes: 1149 The dense format is fully compatible with standard Fortran 77 1150 storage by columns. 1151 1152 The data input variable is intended primarily for Fortran programmers 1153 who wish to allocate their own matrix memory space. Most users should 1154 set data=PETSC_NULL. 1155 1156 Level: intermediate 1157 1158 .keywords: matrix,dense, parallel 1159 1160 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1161 @*/ 1162 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1163 { 1164 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1165 1166 PetscFunctionBegin; 1167 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1168 if (f) { 1169 ierr = (*f)(mat,data);CHKERRQ(ierr); 1170 } 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatCreateMPIDense" 1176 /*@C 1177 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1178 1179 Collective on MPI_Comm 1180 1181 Input Parameters: 1182 + comm - MPI communicator 1183 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1184 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1185 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1186 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1187 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1188 to control all matrix memory allocation. 1189 1190 Output Parameter: 1191 . A - the matrix 1192 1193 Notes: 1194 The dense format is fully compatible with standard Fortran 77 1195 storage by columns. 1196 1197 The data input variable is intended primarily for Fortran programmers 1198 who wish to allocate their own matrix memory space. Most users should 1199 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1200 1201 The user MUST specify either the local or global matrix dimensions 1202 (possibly both). 1203 1204 Level: intermediate 1205 1206 .keywords: matrix,dense, parallel 1207 1208 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1209 @*/ 1210 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1211 { 1212 PetscErrorCode ierr; 1213 PetscMPIInt size; 1214 1215 PetscFunctionBegin; 1216 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1217 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1218 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1219 if (size > 1) { 1220 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1221 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1222 } else { 1223 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1224 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1225 } 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatDuplicate_MPIDense" 1231 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1232 { 1233 Mat mat; 1234 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1235 PetscErrorCode ierr; 1236 1237 PetscFunctionBegin; 1238 *newmat = 0; 1239 ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr); 1240 ierr = MatSetSizes(mat,A->m,A->n,A->M,A->N);CHKERRQ(ierr); 1241 ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr); 1242 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1243 mat->data = (void*)a; 1244 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1245 mat->factor = A->factor; 1246 mat->assembled = PETSC_TRUE; 1247 mat->preallocated = PETSC_TRUE; 1248 1249 a->rstart = oldmat->rstart; 1250 a->rend = oldmat->rend; 1251 a->size = oldmat->size; 1252 a->rank = oldmat->rank; 1253 mat->insertmode = NOT_SET_VALUES; 1254 a->nvec = oldmat->nvec; 1255 a->donotstash = oldmat->donotstash; 1256 ierr = PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr); 1257 ierr = PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr); 1258 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1259 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1260 1261 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1262 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1263 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1264 *newmat = mat; 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #include "petscsys.h" 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1272 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1273 { 1274 PetscErrorCode ierr; 1275 PetscMPIInt rank,size; 1276 PetscInt *rowners,i,m,nz,j; 1277 PetscScalar *array,*vals,*vals_ptr; 1278 MPI_Status status; 1279 1280 PetscFunctionBegin; 1281 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1282 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1283 1284 /* determine ownership of all rows */ 1285 m = M/size + ((M % size) > rank); 1286 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1287 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1288 rowners[0] = 0; 1289 for (i=2; i<=size; i++) { 1290 rowners[i] += rowners[i-1]; 1291 } 1292 1293 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1294 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1295 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1296 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1297 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1298 1299 if (!rank) { 1300 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1301 1302 /* read in my part of the matrix numerical values */ 1303 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1304 1305 /* insert into matrix-by row (this is why cannot directly read into array */ 1306 vals_ptr = vals; 1307 for (i=0; i<m; i++) { 1308 for (j=0; j<N; j++) { 1309 array[i + j*m] = *vals_ptr++; 1310 } 1311 } 1312 1313 /* read in other processors and ship out */ 1314 for (i=1; i<size; i++) { 1315 nz = (rowners[i+1] - rowners[i])*N; 1316 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1317 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1318 } 1319 } else { 1320 /* receive numeric values */ 1321 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1322 1323 /* receive message of values*/ 1324 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1325 1326 /* insert into matrix-by row (this is why cannot directly read into array */ 1327 vals_ptr = vals; 1328 for (i=0; i<m; i++) { 1329 for (j=0; j<N; j++) { 1330 array[i + j*m] = *vals_ptr++; 1331 } 1332 } 1333 } 1334 ierr = PetscFree(rowners);CHKERRQ(ierr); 1335 ierr = PetscFree(vals);CHKERRQ(ierr); 1336 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1337 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatLoad_MPIDense" 1343 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1344 { 1345 Mat A; 1346 PetscScalar *vals,*svals; 1347 MPI_Comm comm = ((PetscObject)viewer)->comm; 1348 MPI_Status status; 1349 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1350 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1351 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1352 PetscInt i,nz,j,rstart,rend; 1353 int fd; 1354 PetscErrorCode ierr; 1355 1356 PetscFunctionBegin; 1357 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1358 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1359 if (!rank) { 1360 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1361 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1362 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1363 } 1364 1365 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1366 M = header[1]; N = header[2]; nz = header[3]; 1367 1368 /* 1369 Handle case where matrix is stored on disk as a dense matrix 1370 */ 1371 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1372 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 /* determine ownership of all rows */ 1377 m = M/size + ((M % size) > rank); 1378 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1379 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1380 rowners[0] = 0; 1381 for (i=2; i<=size; i++) { 1382 rowners[i] += rowners[i-1]; 1383 } 1384 rstart = rowners[rank]; 1385 rend = rowners[rank+1]; 1386 1387 /* distribute row lengths to all processors */ 1388 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1389 offlens = ourlens + (rend-rstart); 1390 if (!rank) { 1391 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1392 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1393 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1394 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1395 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1396 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1397 } else { 1398 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1399 } 1400 1401 if (!rank) { 1402 /* calculate the number of nonzeros on each processor */ 1403 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1404 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1405 for (i=0; i<size; i++) { 1406 for (j=rowners[i]; j< rowners[i+1]; j++) { 1407 procsnz[i] += rowlengths[j]; 1408 } 1409 } 1410 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1411 1412 /* determine max buffer needed and allocate it */ 1413 maxnz = 0; 1414 for (i=0; i<size; i++) { 1415 maxnz = PetscMax(maxnz,procsnz[i]); 1416 } 1417 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1418 1419 /* read in my part of the matrix column indices */ 1420 nz = procsnz[0]; 1421 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1422 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1423 1424 /* read in every one elses and ship off */ 1425 for (i=1; i<size; i++) { 1426 nz = procsnz[i]; 1427 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1428 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1429 } 1430 ierr = PetscFree(cols);CHKERRQ(ierr); 1431 } else { 1432 /* determine buffer space needed for message */ 1433 nz = 0; 1434 for (i=0; i<m; i++) { 1435 nz += ourlens[i]; 1436 } 1437 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1438 1439 /* receive message of column indices*/ 1440 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1441 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1442 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1443 } 1444 1445 /* loop over local rows, determining number of off diagonal entries */ 1446 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1447 jj = 0; 1448 for (i=0; i<m; i++) { 1449 for (j=0; j<ourlens[i]; j++) { 1450 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1451 jj++; 1452 } 1453 } 1454 1455 /* create our matrix */ 1456 for (i=0; i<m; i++) { 1457 ourlens[i] -= offlens[i]; 1458 } 1459 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1460 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1461 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1462 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1463 A = *newmat; 1464 for (i=0; i<m; i++) { 1465 ourlens[i] += offlens[i]; 1466 } 1467 1468 if (!rank) { 1469 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1470 1471 /* read in my part of the matrix numerical values */ 1472 nz = procsnz[0]; 1473 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1474 1475 /* insert into matrix */ 1476 jj = rstart; 1477 smycols = mycols; 1478 svals = vals; 1479 for (i=0; i<m; i++) { 1480 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1481 smycols += ourlens[i]; 1482 svals += ourlens[i]; 1483 jj++; 1484 } 1485 1486 /* read in other processors and ship out */ 1487 for (i=1; i<size; i++) { 1488 nz = procsnz[i]; 1489 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1490 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1491 } 1492 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1493 } else { 1494 /* receive numeric values */ 1495 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1496 1497 /* receive message of values*/ 1498 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1499 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1500 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1501 1502 /* insert into matrix */ 1503 jj = rstart; 1504 smycols = mycols; 1505 svals = vals; 1506 for (i=0; i<m; i++) { 1507 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1508 smycols += ourlens[i]; 1509 svals += ourlens[i]; 1510 jj++; 1511 } 1512 } 1513 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1514 ierr = PetscFree(vals);CHKERRQ(ierr); 1515 ierr = PetscFree(mycols);CHKERRQ(ierr); 1516 ierr = PetscFree(rowners);CHKERRQ(ierr); 1517 1518 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1519 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 1524 1525 1526 1527