1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatGetRow_MPIDense" 11 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 12 { 13 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 14 PetscErrorCode ierr; 15 PetscInt lrow,rstart = mat->rstart,rend = mat->rend; 16 17 PetscFunctionBegin; 18 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 19 lrow = row - rstart; 20 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 21 PetscFunctionReturn(0); 22 } 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatRestoreRow_MPIDense" 26 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 27 { 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 32 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 33 PetscFunctionReturn(0); 34 } 35 36 EXTERN_C_BEGIN 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 39 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 40 { 41 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 42 PetscErrorCode ierr; 43 PetscInt m = A->m,rstart = mdn->rstart; 44 PetscScalar *array; 45 MPI_Comm comm; 46 47 PetscFunctionBegin; 48 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 49 50 /* The reuse aspect is not implemented efficiently */ 51 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 52 53 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 54 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 55 ierr = MatCreate(comm,B);CHKERRQ(ierr); 56 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 57 ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr); 58 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 59 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 60 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 61 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 62 63 *iscopy = PETSC_TRUE; 64 PetscFunctionReturn(0); 65 } 66 EXTERN_C_END 67 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatSetValues_MPIDense" 70 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 71 { 72 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 73 PetscErrorCode ierr; 74 PetscInt i,j,rstart = A->rstart,rend = A->rend,row; 75 PetscTruth roworiented = A->roworiented; 76 77 PetscFunctionBegin; 78 for (i=0; i<m; i++) { 79 if (idxm[i] < 0) continue; 80 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 81 if (idxm[i] >= rstart && idxm[i] < rend) { 82 row = idxm[i] - rstart; 83 if (roworiented) { 84 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 85 } else { 86 for (j=0; j<n; j++) { 87 if (idxn[j] < 0) continue; 88 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 89 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 90 } 91 } 92 } else { 93 if (!A->donotstash) { 94 if (roworiented) { 95 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 96 } else { 97 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 98 } 99 } 100 } 101 } 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetValues_MPIDense" 107 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 108 { 109 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 110 PetscErrorCode ierr; 111 PetscInt i,j,rstart = mdn->rstart,rend = mdn->rend,row; 112 113 PetscFunctionBegin; 114 for (i=0; i<m; i++) { 115 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 116 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 117 if (idxm[i] >= rstart && idxm[i] < rend) { 118 row = idxm[i] - rstart; 119 for (j=0; j<n; j++) { 120 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 121 if (idxn[j] >= mat->N) { 122 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 123 } 124 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 125 } 126 } else { 127 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 128 } 129 } 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatGetArray_MPIDense" 135 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 136 { 137 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 138 PetscErrorCode ierr; 139 140 PetscFunctionBegin; 141 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 142 PetscFunctionReturn(0); 143 } 144 145 #undef __FUNCT__ 146 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 147 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 148 { 149 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 150 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 151 PetscErrorCode ierr; 152 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 153 PetscScalar *av,*bv,*v = lmat->v; 154 Mat newmat; 155 156 PetscFunctionBegin; 157 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 158 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 159 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 160 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 161 162 /* No parallel redistribution currently supported! Should really check each index set 163 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 164 original matrix! */ 165 166 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 167 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 168 169 /* Check submatrix call */ 170 if (scall == MAT_REUSE_MATRIX) { 171 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 172 /* Really need to test rows and column sizes! */ 173 newmat = *B; 174 } else { 175 /* Create and fill new matrix */ 176 ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr); 177 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 178 ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr); 179 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 180 } 181 182 /* Now extract the data pointers and do the copy, column at a time */ 183 newmatd = (Mat_MPIDense*)newmat->data; 184 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 185 186 for (i=0; i<ncols; i++) { 187 av = v + nlrows*icol[i]; 188 for (j=0; j<nrows; j++) { 189 *bv++ = av[irow[j] - rstart]; 190 } 191 } 192 193 /* Assemble the matrices so that the correct flags are set */ 194 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 195 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 196 197 /* Free work space */ 198 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 199 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 200 *B = newmat; 201 PetscFunctionReturn(0); 202 } 203 204 #undef __FUNCT__ 205 #define __FUNCT__ "MatRestoreArray_MPIDense" 206 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 207 { 208 PetscFunctionBegin; 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 214 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 215 { 216 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 217 MPI_Comm comm = mat->comm; 218 PetscErrorCode ierr; 219 PetscInt nstash,reallocs; 220 InsertMode addv; 221 222 PetscFunctionBegin; 223 /* make sure all processors are either in INSERTMODE or ADDMODE */ 224 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 225 if (addv == (ADD_VALUES|INSERT_VALUES)) { 226 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 227 } 228 mat->insertmode = addv; /* in case this processor had no cache */ 229 230 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 231 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 232 ierr = PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 238 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 239 { 240 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 241 PetscErrorCode ierr; 242 PetscInt i,*row,*col,flg,j,rstart,ncols; 243 PetscMPIInt n; 244 PetscScalar *val; 245 InsertMode addv=mat->insertmode; 246 247 PetscFunctionBegin; 248 /* wait on receives */ 249 while (1) { 250 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 251 if (!flg) break; 252 253 for (i=0; i<n;) { 254 /* Now identify the consecutive vals belonging to the same row */ 255 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 256 if (j < n) ncols = j-i; 257 else ncols = n-i; 258 /* Now assemble all these values with a single function call */ 259 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 260 i = j; 261 } 262 } 263 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 264 265 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 266 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 267 268 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 269 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatZeroEntries_MPIDense" 276 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 277 { 278 PetscErrorCode ierr; 279 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 280 281 PetscFunctionBegin; 282 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 /* the code does not do the diagonal entries correctly unless the 287 matrix is square and the column and row owerships are identical. 288 This is a BUG. The only way to fix it seems to be to access 289 mdn->A and mdn->B directly and not through the MatZeroRows() 290 routine. 291 */ 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatZeroRows_MPIDense" 294 PetscErrorCode MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag) 295 { 296 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 297 PetscErrorCode ierr; 298 PetscInt i,N,*rows,*owners = l->rowners; 299 PetscInt *nprocs,j,idx,nsends; 300 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 301 PetscInt *rvalues,tag = A->tag,count,base,slen,*source; 302 PetscInt *lens,*lrows,*values; 303 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 304 MPI_Comm comm = A->comm; 305 MPI_Request *send_waits,*recv_waits; 306 MPI_Status recv_status,*send_status; 307 IS istmp; 308 PetscTruth found; 309 310 PetscFunctionBegin; 311 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 312 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 313 314 /* first count number of contributors to each processor */ 315 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 316 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 317 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 318 for (i=0; i<N; i++) { 319 idx = rows[i]; 320 found = PETSC_FALSE; 321 for (j=0; j<size; j++) { 322 if (idx >= owners[j] && idx < owners[j+1]) { 323 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 324 } 325 } 326 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 327 } 328 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 329 330 /* inform other processors of number of messages and max length*/ 331 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 332 333 /* post receives: */ 334 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 335 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 336 for (i=0; i<nrecvs; i++) { 337 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 338 } 339 340 /* do sends: 341 1) starts[i] gives the starting index in svalues for stuff going to 342 the ith processor 343 */ 344 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 345 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 346 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 347 starts[0] = 0; 348 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 349 for (i=0; i<N; i++) { 350 svalues[starts[owner[i]]++] = rows[i]; 351 } 352 ISRestoreIndices(is,&rows); 353 354 starts[0] = 0; 355 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 356 count = 0; 357 for (i=0; i<size; i++) { 358 if (nprocs[2*i+1]) { 359 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 360 } 361 } 362 ierr = PetscFree(starts);CHKERRQ(ierr); 363 364 base = owners[rank]; 365 366 /* wait on receives */ 367 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 368 source = lens + nrecvs; 369 count = nrecvs; slen = 0; 370 while (count) { 371 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 372 /* unpack receives into our local space */ 373 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 374 source[imdex] = recv_status.MPI_SOURCE; 375 lens[imdex] = n; 376 slen += n; 377 count--; 378 } 379 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 380 381 /* move the data into the send scatter */ 382 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 383 count = 0; 384 for (i=0; i<nrecvs; i++) { 385 values = rvalues + i*nmax; 386 for (j=0; j<lens[i]; j++) { 387 lrows[count++] = values[j] - base; 388 } 389 } 390 ierr = PetscFree(rvalues);CHKERRQ(ierr); 391 ierr = PetscFree(lens);CHKERRQ(ierr); 392 ierr = PetscFree(owner);CHKERRQ(ierr); 393 ierr = PetscFree(nprocs);CHKERRQ(ierr); 394 395 /* actually zap the local rows */ 396 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 397 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 398 ierr = PetscFree(lrows);CHKERRQ(ierr); 399 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 400 ierr = ISDestroy(istmp);CHKERRQ(ierr); 401 402 /* wait on sends */ 403 if (nsends) { 404 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 405 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 406 ierr = PetscFree(send_status);CHKERRQ(ierr); 407 } 408 ierr = PetscFree(send_waits);CHKERRQ(ierr); 409 ierr = PetscFree(svalues);CHKERRQ(ierr); 410 411 PetscFunctionReturn(0); 412 } 413 414 #undef __FUNCT__ 415 #define __FUNCT__ "MatMult_MPIDense" 416 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 417 { 418 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 419 PetscErrorCode ierr; 420 421 PetscFunctionBegin; 422 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 423 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 424 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatMultAdd_MPIDense" 430 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 431 { 432 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 433 PetscErrorCode ierr; 434 435 PetscFunctionBegin; 436 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 437 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 438 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatMultTranspose_MPIDense" 444 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 445 { 446 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 447 PetscErrorCode ierr; 448 PetscScalar zero = 0.0; 449 450 PetscFunctionBegin; 451 ierr = VecSet(yy,zero);CHKERRQ(ierr); 452 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 453 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 454 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 460 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 461 { 462 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 463 PetscErrorCode ierr; 464 465 PetscFunctionBegin; 466 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 467 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 468 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 469 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 470 PetscFunctionReturn(0); 471 } 472 473 #undef __FUNCT__ 474 #define __FUNCT__ "MatGetDiagonal_MPIDense" 475 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 476 { 477 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 478 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 479 PetscErrorCode ierr; 480 PetscInt len,i,n,m = A->m,radd; 481 PetscScalar *x,zero = 0.0; 482 483 PetscFunctionBegin; 484 ierr = VecSet(v,zero);CHKERRQ(ierr); 485 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 486 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 487 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 488 len = PetscMin(a->A->m,a->A->n); 489 radd = a->rstart*m; 490 for (i=0; i<len; i++) { 491 x[i] = aloc->v[radd + i*m + i]; 492 } 493 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatDestroy_MPIDense" 499 PetscErrorCode MatDestroy_MPIDense(Mat mat) 500 { 501 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 502 PetscErrorCode ierr; 503 504 PetscFunctionBegin; 505 506 #if defined(PETSC_USE_LOG) 507 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 508 #endif 509 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 510 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 511 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 512 if (mdn->lvec) VecDestroy(mdn->lvec); 513 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 514 if (mdn->factor) { 515 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 516 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 517 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 518 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 519 } 520 ierr = PetscFree(mdn);CHKERRQ(ierr); 521 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 522 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatView_MPIDense_Binary" 528 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 529 { 530 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 531 PetscErrorCode ierr; 532 533 PetscFunctionBegin; 534 if (mdn->size == 1) { 535 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 536 } 537 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 543 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 544 { 545 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 546 PetscErrorCode ierr; 547 PetscMPIInt size = mdn->size,rank = mdn->rank; 548 PetscViewerType vtype; 549 PetscTruth iascii,isdraw; 550 PetscViewer sviewer; 551 PetscViewerFormat format; 552 553 PetscFunctionBegin; 554 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 555 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 556 if (iascii) { 557 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 558 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 559 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 560 MatInfo info; 561 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 562 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m, 563 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 564 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 565 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 566 PetscFunctionReturn(0); 567 } else if (format == PETSC_VIEWER_ASCII_INFO) { 568 PetscFunctionReturn(0); 569 } 570 } else if (isdraw) { 571 PetscDraw draw; 572 PetscTruth isnull; 573 574 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 575 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 576 if (isnull) PetscFunctionReturn(0); 577 } 578 579 if (size == 1) { 580 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 581 } else { 582 /* assemble the entire matrix onto first processor. */ 583 Mat A; 584 PetscInt M = mat->M,N = mat->N,m,row,i,nz; 585 PetscInt *cols; 586 PetscScalar *vals; 587 588 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 589 if (!rank) { 590 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 591 } else { 592 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 593 } 594 /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */ 595 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 596 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 597 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 598 599 /* Copy the matrix ... This isn't the most efficient means, 600 but it's quick for now */ 601 A->insertmode = INSERT_VALUES; 602 row = mdn->rstart; m = mdn->A->m; 603 for (i=0; i<m; i++) { 604 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 605 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 606 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 607 row++; 608 } 609 610 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 611 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 612 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 613 if (!rank) { 614 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 615 } 616 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 617 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 618 ierr = MatDestroy(A);CHKERRQ(ierr); 619 } 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatView_MPIDense" 625 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 626 { 627 PetscErrorCode ierr; 628 PetscTruth iascii,isbinary,isdraw,issocket; 629 630 PetscFunctionBegin; 631 632 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 633 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 634 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 635 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 636 637 if (iascii || issocket || isdraw) { 638 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 639 } else if (isbinary) { 640 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 641 } else { 642 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 643 } 644 PetscFunctionReturn(0); 645 } 646 647 #undef __FUNCT__ 648 #define __FUNCT__ "MatGetInfo_MPIDense" 649 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 650 { 651 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 652 Mat mdn = mat->A; 653 PetscErrorCode ierr; 654 PetscReal isend[5],irecv[5]; 655 656 PetscFunctionBegin; 657 info->rows_global = (double)A->M; 658 info->columns_global = (double)A->N; 659 info->rows_local = (double)A->m; 660 info->columns_local = (double)A->N; 661 info->block_size = 1.0; 662 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 663 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 664 isend[3] = info->memory; isend[4] = info->mallocs; 665 if (flag == MAT_LOCAL) { 666 info->nz_used = isend[0]; 667 info->nz_allocated = isend[1]; 668 info->nz_unneeded = isend[2]; 669 info->memory = isend[3]; 670 info->mallocs = isend[4]; 671 } else if (flag == MAT_GLOBAL_MAX) { 672 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 673 info->nz_used = irecv[0]; 674 info->nz_allocated = irecv[1]; 675 info->nz_unneeded = irecv[2]; 676 info->memory = irecv[3]; 677 info->mallocs = irecv[4]; 678 } else if (flag == MAT_GLOBAL_SUM) { 679 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 680 info->nz_used = irecv[0]; 681 info->nz_allocated = irecv[1]; 682 info->nz_unneeded = irecv[2]; 683 info->memory = irecv[3]; 684 info->mallocs = irecv[4]; 685 } 686 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 687 info->fill_ratio_needed = 0; 688 info->factor_mallocs = 0; 689 PetscFunctionReturn(0); 690 } 691 692 #undef __FUNCT__ 693 #define __FUNCT__ "MatSetOption_MPIDense" 694 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op) 695 { 696 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 697 PetscErrorCode ierr; 698 699 PetscFunctionBegin; 700 switch (op) { 701 case MAT_NO_NEW_NONZERO_LOCATIONS: 702 case MAT_YES_NEW_NONZERO_LOCATIONS: 703 case MAT_NEW_NONZERO_LOCATION_ERR: 704 case MAT_NEW_NONZERO_ALLOCATION_ERR: 705 case MAT_COLUMNS_SORTED: 706 case MAT_COLUMNS_UNSORTED: 707 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 708 break; 709 case MAT_ROW_ORIENTED: 710 a->roworiented = PETSC_TRUE; 711 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 712 break; 713 case MAT_ROWS_SORTED: 714 case MAT_ROWS_UNSORTED: 715 case MAT_YES_NEW_DIAGONALS: 716 case MAT_USE_HASH_TABLE: 717 ierr = PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));CHKERRQ(ierr); 718 break; 719 case MAT_COLUMN_ORIENTED: 720 a->roworiented = PETSC_FALSE; 721 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 722 break; 723 case MAT_IGNORE_OFF_PROC_ENTRIES: 724 a->donotstash = PETSC_TRUE; 725 break; 726 case MAT_NO_NEW_DIAGONALS: 727 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 728 case MAT_SYMMETRIC: 729 case MAT_STRUCTURALLY_SYMMETRIC: 730 case MAT_NOT_SYMMETRIC: 731 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 732 case MAT_HERMITIAN: 733 case MAT_NOT_HERMITIAN: 734 case MAT_SYMMETRY_ETERNAL: 735 case MAT_NOT_SYMMETRY_ETERNAL: 736 break; 737 default: 738 SETERRQ(PETSC_ERR_SUP,"unknown option"); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatDiagonalScale_MPIDense" 746 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 747 { 748 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 749 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 750 PetscScalar *l,*r,x,*v; 751 PetscErrorCode ierr; 752 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 753 754 PetscFunctionBegin; 755 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 756 if (ll) { 757 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 758 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 759 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 760 for (i=0; i<m; i++) { 761 x = l[i]; 762 v = mat->v + i; 763 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 764 } 765 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 766 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 767 } 768 if (rr) { 769 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 770 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 771 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 772 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 773 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 774 for (i=0; i<n; i++) { 775 x = r[i]; 776 v = mat->v + i*m; 777 for (j=0; j<m; j++) { (*v++) *= x;} 778 } 779 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 780 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 781 } 782 PetscFunctionReturn(0); 783 } 784 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatNorm_MPIDense" 787 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 788 { 789 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 790 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 791 PetscErrorCode ierr; 792 PetscInt i,j; 793 PetscReal sum = 0.0; 794 PetscScalar *v = mat->v; 795 796 PetscFunctionBegin; 797 if (mdn->size == 1) { 798 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 799 } else { 800 if (type == NORM_FROBENIUS) { 801 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 802 #if defined(PETSC_USE_COMPLEX) 803 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 804 #else 805 sum += (*v)*(*v); v++; 806 #endif 807 } 808 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 809 *nrm = sqrt(*nrm); 810 ierr = PetscLogFlops(2*mdn->A->n*mdn->A->m);CHKERRQ(ierr); 811 } else if (type == NORM_1) { 812 PetscReal *tmp,*tmp2; 813 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 814 tmp2 = tmp + A->N; 815 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 816 *nrm = 0.0; 817 v = mat->v; 818 for (j=0; j<mdn->A->n; j++) { 819 for (i=0; i<mdn->A->m; i++) { 820 tmp[j] += PetscAbsScalar(*v); v++; 821 } 822 } 823 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 824 for (j=0; j<A->N; j++) { 825 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 826 } 827 ierr = PetscFree(tmp);CHKERRQ(ierr); 828 ierr = PetscLogFlops(A->n*A->m);CHKERRQ(ierr); 829 } else if (type == NORM_INFINITY) { /* max row norm */ 830 PetscReal ntemp; 831 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 832 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 833 } else { 834 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 835 } 836 } 837 PetscFunctionReturn(0); 838 } 839 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatTranspose_MPIDense" 842 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout) 843 { 844 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 845 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 846 Mat B; 847 PetscInt M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 848 PetscErrorCode ierr; 849 PetscInt j,i; 850 PetscScalar *v; 851 852 PetscFunctionBegin; 853 if (!matout && M != N) { 854 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 855 } 856 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 857 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 858 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 859 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 860 861 m = a->A->m; n = a->A->n; v = Aloc->v; 862 ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 863 for (j=0; j<n; j++) { 864 for (i=0; i<m; i++) rwork[i] = rstart + i; 865 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 866 v += m; 867 } 868 ierr = PetscFree(rwork);CHKERRQ(ierr); 869 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 870 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 871 if (matout) { 872 *matout = B; 873 } else { 874 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 875 } 876 PetscFunctionReturn(0); 877 } 878 879 #include "petscblaslapack.h" 880 #undef __FUNCT__ 881 #define __FUNCT__ "MatScale_MPIDense" 882 PetscErrorCode MatScale_MPIDense(const PetscScalar *alpha,Mat inA) 883 { 884 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 885 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 886 PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N; 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 BLASscal_(&nz,(PetscScalar*)alpha,a->v,&one); 891 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 899 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 900 { 901 PetscErrorCode ierr; 902 903 PetscFunctionBegin; 904 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 905 PetscFunctionReturn(0); 906 } 907 908 /* -------------------------------------------------------------------*/ 909 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 910 MatGetRow_MPIDense, 911 MatRestoreRow_MPIDense, 912 MatMult_MPIDense, 913 /* 4*/ MatMultAdd_MPIDense, 914 MatMultTranspose_MPIDense, 915 MatMultTransposeAdd_MPIDense, 916 0, 917 0, 918 0, 919 /*10*/ 0, 920 0, 921 0, 922 0, 923 MatTranspose_MPIDense, 924 /*15*/ MatGetInfo_MPIDense, 925 0, 926 MatGetDiagonal_MPIDense, 927 MatDiagonalScale_MPIDense, 928 MatNorm_MPIDense, 929 /*20*/ MatAssemblyBegin_MPIDense, 930 MatAssemblyEnd_MPIDense, 931 0, 932 MatSetOption_MPIDense, 933 MatZeroEntries_MPIDense, 934 /*25*/ MatZeroRows_MPIDense, 935 0, 936 0, 937 0, 938 0, 939 /*30*/ MatSetUpPreallocation_MPIDense, 940 0, 941 0, 942 MatGetArray_MPIDense, 943 MatRestoreArray_MPIDense, 944 /*35*/ MatDuplicate_MPIDense, 945 0, 946 0, 947 0, 948 0, 949 /*40*/ 0, 950 MatGetSubMatrices_MPIDense, 951 0, 952 MatGetValues_MPIDense, 953 0, 954 /*45*/ 0, 955 MatScale_MPIDense, 956 0, 957 0, 958 0, 959 /*50*/ 0, 960 0, 961 0, 962 0, 963 0, 964 /*55*/ 0, 965 0, 966 0, 967 0, 968 0, 969 /*60*/ MatGetSubMatrix_MPIDense, 970 MatDestroy_MPIDense, 971 MatView_MPIDense, 972 MatGetPetscMaps_Petsc, 973 0, 974 /*65*/ 0, 975 0, 976 0, 977 0, 978 0, 979 /*70*/ 0, 980 0, 981 0, 982 0, 983 0, 984 /*75*/ 0, 985 0, 986 0, 987 0, 988 0, 989 /*80*/ 0, 990 0, 991 0, 992 0, 993 /*84*/ MatLoad_MPIDense, 994 0, 995 0, 996 0, 997 0, 998 0, 999 /*90*/ 0, 1000 0, 1001 0, 1002 0, 1003 0, 1004 /*95*/ 0, 1005 0, 1006 0, 1007 0}; 1008 1009 EXTERN_C_BEGIN 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1012 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1013 { 1014 Mat_MPIDense *a; 1015 PetscErrorCode ierr; 1016 1017 PetscFunctionBegin; 1018 mat->preallocated = PETSC_TRUE; 1019 /* Note: For now, when data is specified above, this assumes the user correctly 1020 allocates the local dense storage space. We should add error checking. */ 1021 1022 a = (Mat_MPIDense*)mat->data; 1023 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1024 ierr = MatSetSizes(a->A,mat->m,mat->N,mat->m,mat->N);CHKERRQ(ierr); 1025 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1026 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1027 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1028 PetscFunctionReturn(0); 1029 } 1030 EXTERN_C_END 1031 1032 /*MC 1033 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1034 1035 Options Database Keys: 1036 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1037 1038 Level: beginner 1039 1040 .seealso: MatCreateMPIDense 1041 M*/ 1042 1043 EXTERN_C_BEGIN 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatCreate_MPIDense" 1046 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1047 { 1048 Mat_MPIDense *a; 1049 PetscErrorCode ierr; 1050 PetscInt i; 1051 1052 PetscFunctionBegin; 1053 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1054 mat->data = (void*)a; 1055 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1056 mat->factor = 0; 1057 mat->mapping = 0; 1058 1059 a->factor = 0; 1060 mat->insertmode = NOT_SET_VALUES; 1061 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 1062 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 1063 1064 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 1065 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 1066 a->nvec = mat->n; 1067 1068 /* the information in the maps duplicates the information computed below, eventually 1069 we should remove the duplicate information that is not contained in the maps */ 1070 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 1071 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 1072 1073 /* build local table of row and column ownerships */ 1074 ierr = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr); 1075 a->cowners = a->rowners + a->size + 1; 1076 ierr = PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr); 1077 ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr); 1078 a->rowners[0] = 0; 1079 for (i=2; i<=a->size; i++) { 1080 a->rowners[i] += a->rowners[i-1]; 1081 } 1082 a->rstart = a->rowners[a->rank]; 1083 a->rend = a->rowners[a->rank+1]; 1084 ierr = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr); 1085 a->cowners[0] = 0; 1086 for (i=2; i<=a->size; i++) { 1087 a->cowners[i] += a->cowners[i-1]; 1088 } 1089 1090 /* build cache for off array entries formed */ 1091 a->donotstash = PETSC_FALSE; 1092 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1093 1094 /* stuff used for matrix vector multiply */ 1095 a->lvec = 0; 1096 a->Mvctx = 0; 1097 a->roworiented = PETSC_TRUE; 1098 1099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1100 "MatGetDiagonalBlock_MPIDense", 1101 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1102 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1103 "MatMPIDenseSetPreallocation_MPIDense", 1104 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 EXTERN_C_END 1108 1109 /*MC 1110 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1111 1112 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1113 and MATMPIDENSE otherwise. 1114 1115 Options Database Keys: 1116 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1117 1118 Level: beginner 1119 1120 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1121 M*/ 1122 1123 EXTERN_C_BEGIN 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatCreate_Dense" 1126 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1127 { 1128 PetscErrorCode ierr; 1129 PetscMPIInt size; 1130 1131 PetscFunctionBegin; 1132 ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr); 1133 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1134 if (size == 1) { 1135 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1136 } else { 1137 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1138 } 1139 PetscFunctionReturn(0); 1140 } 1141 EXTERN_C_END 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1145 /*@C 1146 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1147 1148 Not collective 1149 1150 Input Parameters: 1151 . A - the matrix 1152 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1153 to control all matrix memory allocation. 1154 1155 Notes: 1156 The dense format is fully compatible with standard Fortran 77 1157 storage by columns. 1158 1159 The data input variable is intended primarily for Fortran programmers 1160 who wish to allocate their own matrix memory space. Most users should 1161 set data=PETSC_NULL. 1162 1163 Level: intermediate 1164 1165 .keywords: matrix,dense, parallel 1166 1167 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1168 @*/ 1169 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1170 { 1171 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1172 1173 PetscFunctionBegin; 1174 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1175 if (f) { 1176 ierr = (*f)(mat,data);CHKERRQ(ierr); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatCreateMPIDense" 1183 /*@C 1184 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1185 1186 Collective on MPI_Comm 1187 1188 Input Parameters: 1189 + comm - MPI communicator 1190 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1191 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1192 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1193 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1194 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1195 to control all matrix memory allocation. 1196 1197 Output Parameter: 1198 . A - the matrix 1199 1200 Notes: 1201 The dense format is fully compatible with standard Fortran 77 1202 storage by columns. 1203 1204 The data input variable is intended primarily for Fortran programmers 1205 who wish to allocate their own matrix memory space. Most users should 1206 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1207 1208 The user MUST specify either the local or global matrix dimensions 1209 (possibly both). 1210 1211 Level: intermediate 1212 1213 .keywords: matrix,dense, parallel 1214 1215 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1216 @*/ 1217 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1218 { 1219 PetscErrorCode ierr; 1220 PetscMPIInt size; 1221 1222 PetscFunctionBegin; 1223 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1224 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1225 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1226 if (size > 1) { 1227 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1228 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1229 } else { 1230 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1231 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1232 } 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatDuplicate_MPIDense" 1238 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1239 { 1240 Mat mat; 1241 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1242 PetscErrorCode ierr; 1243 1244 PetscFunctionBegin; 1245 *newmat = 0; 1246 ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr); 1247 ierr = MatSetSizes(mat,A->m,A->n,A->M,A->N);CHKERRQ(ierr); 1248 ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr); 1249 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1250 mat->data = (void*)a; 1251 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1252 mat->factor = A->factor; 1253 mat->assembled = PETSC_TRUE; 1254 mat->preallocated = PETSC_TRUE; 1255 1256 a->rstart = oldmat->rstart; 1257 a->rend = oldmat->rend; 1258 a->size = oldmat->size; 1259 a->rank = oldmat->rank; 1260 mat->insertmode = NOT_SET_VALUES; 1261 a->nvec = oldmat->nvec; 1262 a->donotstash = oldmat->donotstash; 1263 ierr = PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr); 1264 ierr = PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr); 1265 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1266 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1267 1268 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1269 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1270 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1271 *newmat = mat; 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #include "petscsys.h" 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1279 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1280 { 1281 PetscErrorCode ierr; 1282 PetscMPIInt rank,size; 1283 PetscInt *rowners,i,m,nz,j; 1284 PetscScalar *array,*vals,*vals_ptr; 1285 MPI_Status status; 1286 1287 PetscFunctionBegin; 1288 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1289 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1290 1291 /* determine ownership of all rows */ 1292 m = M/size + ((M % size) > rank); 1293 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1294 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1295 rowners[0] = 0; 1296 for (i=2; i<=size; i++) { 1297 rowners[i] += rowners[i-1]; 1298 } 1299 1300 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1301 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1302 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1303 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1304 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1305 1306 if (!rank) { 1307 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1308 1309 /* read in my part of the matrix numerical values */ 1310 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1311 1312 /* insert into matrix-by row (this is why cannot directly read into array */ 1313 vals_ptr = vals; 1314 for (i=0; i<m; i++) { 1315 for (j=0; j<N; j++) { 1316 array[i + j*m] = *vals_ptr++; 1317 } 1318 } 1319 1320 /* read in other processors and ship out */ 1321 for (i=1; i<size; i++) { 1322 nz = (rowners[i+1] - rowners[i])*N; 1323 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1324 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1325 } 1326 } else { 1327 /* receive numeric values */ 1328 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1329 1330 /* receive message of values*/ 1331 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1332 1333 /* insert into matrix-by row (this is why cannot directly read into array */ 1334 vals_ptr = vals; 1335 for (i=0; i<m; i++) { 1336 for (j=0; j<N; j++) { 1337 array[i + j*m] = *vals_ptr++; 1338 } 1339 } 1340 } 1341 ierr = PetscFree(rowners);CHKERRQ(ierr); 1342 ierr = PetscFree(vals);CHKERRQ(ierr); 1343 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1344 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatLoad_MPIDense" 1350 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1351 { 1352 Mat A; 1353 PetscScalar *vals,*svals; 1354 MPI_Comm comm = ((PetscObject)viewer)->comm; 1355 MPI_Status status; 1356 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1357 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1358 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1359 PetscInt i,nz,j,rstart,rend; 1360 int fd; 1361 PetscErrorCode ierr; 1362 1363 PetscFunctionBegin; 1364 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1365 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1366 if (!rank) { 1367 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1368 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1369 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1370 } 1371 1372 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1373 M = header[1]; N = header[2]; nz = header[3]; 1374 1375 /* 1376 Handle case where matrix is stored on disk as a dense matrix 1377 */ 1378 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1379 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1380 PetscFunctionReturn(0); 1381 } 1382 1383 /* determine ownership of all rows */ 1384 m = M/size + ((M % size) > rank); 1385 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1386 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1387 rowners[0] = 0; 1388 for (i=2; i<=size; i++) { 1389 rowners[i] += rowners[i-1]; 1390 } 1391 rstart = rowners[rank]; 1392 rend = rowners[rank+1]; 1393 1394 /* distribute row lengths to all processors */ 1395 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1396 offlens = ourlens + (rend-rstart); 1397 if (!rank) { 1398 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1399 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1400 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1401 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1402 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1403 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1404 } else { 1405 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1406 } 1407 1408 if (!rank) { 1409 /* calculate the number of nonzeros on each processor */ 1410 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1411 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1412 for (i=0; i<size; i++) { 1413 for (j=rowners[i]; j< rowners[i+1]; j++) { 1414 procsnz[i] += rowlengths[j]; 1415 } 1416 } 1417 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1418 1419 /* determine max buffer needed and allocate it */ 1420 maxnz = 0; 1421 for (i=0; i<size; i++) { 1422 maxnz = PetscMax(maxnz,procsnz[i]); 1423 } 1424 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1425 1426 /* read in my part of the matrix column indices */ 1427 nz = procsnz[0]; 1428 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1429 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1430 1431 /* read in every one elses and ship off */ 1432 for (i=1; i<size; i++) { 1433 nz = procsnz[i]; 1434 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1435 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1436 } 1437 ierr = PetscFree(cols);CHKERRQ(ierr); 1438 } else { 1439 /* determine buffer space needed for message */ 1440 nz = 0; 1441 for (i=0; i<m; i++) { 1442 nz += ourlens[i]; 1443 } 1444 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1445 1446 /* receive message of column indices*/ 1447 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1448 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1449 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1450 } 1451 1452 /* loop over local rows, determining number of off diagonal entries */ 1453 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1454 jj = 0; 1455 for (i=0; i<m; i++) { 1456 for (j=0; j<ourlens[i]; j++) { 1457 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1458 jj++; 1459 } 1460 } 1461 1462 /* create our matrix */ 1463 for (i=0; i<m; i++) { 1464 ourlens[i] -= offlens[i]; 1465 } 1466 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1467 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1468 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1469 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1470 A = *newmat; 1471 for (i=0; i<m; i++) { 1472 ourlens[i] += offlens[i]; 1473 } 1474 1475 if (!rank) { 1476 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1477 1478 /* read in my part of the matrix numerical values */ 1479 nz = procsnz[0]; 1480 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1481 1482 /* insert into matrix */ 1483 jj = rstart; 1484 smycols = mycols; 1485 svals = vals; 1486 for (i=0; i<m; i++) { 1487 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1488 smycols += ourlens[i]; 1489 svals += ourlens[i]; 1490 jj++; 1491 } 1492 1493 /* read in other processors and ship out */ 1494 for (i=1; i<size; i++) { 1495 nz = procsnz[i]; 1496 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1497 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1498 } 1499 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1500 } else { 1501 /* receive numeric values */ 1502 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1503 1504 /* receive message of values*/ 1505 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1506 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1507 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1508 1509 /* insert into matrix */ 1510 jj = rstart; 1511 smycols = mycols; 1512 svals = vals; 1513 for (i=0; i<m; i++) { 1514 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1515 smycols += ourlens[i]; 1516 svals += ourlens[i]; 1517 jj++; 1518 } 1519 } 1520 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1521 ierr = PetscFree(vals);CHKERRQ(ierr); 1522 ierr = PetscFree(mycols);CHKERRQ(ierr); 1523 ierr = PetscFree(rowners);CHKERRQ(ierr); 1524 1525 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1526 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 1530 1531 1532 1533 1534