1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatGetRow_MPIDense" 40 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 41 { 42 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 43 PetscErrorCode ierr; 44 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 45 46 PetscFunctionBegin; 47 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 48 lrow = row - rstart; 49 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatRestoreRow_MPIDense" 55 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 56 { 57 PetscErrorCode ierr; 58 59 PetscFunctionBegin; 60 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 61 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 62 PetscFunctionReturn(0); 63 } 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 68 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 69 { 70 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 71 PetscErrorCode ierr; 72 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 73 PetscScalar *array; 74 MPI_Comm comm; 75 Mat B; 76 77 PetscFunctionBegin; 78 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 79 80 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 81 if (!B) { 82 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 83 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 84 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 85 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 86 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 87 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 88 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 89 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 90 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 92 *a = B; 93 ierr = MatDestroy(&B);CHKERRQ(ierr); 94 } else *a = B; 95 PetscFunctionReturn(0); 96 } 97 EXTERN_C_END 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatSetValues_MPIDense" 101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 102 { 103 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 104 PetscErrorCode ierr; 105 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 106 PetscBool roworiented = A->roworiented; 107 108 PetscFunctionBegin; 109 if (v) PetscValidScalarPointer(v,6); 110 for (i=0; i<m; i++) { 111 if (idxm[i] < 0) continue; 112 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 113 if (idxm[i] >= rstart && idxm[i] < rend) { 114 row = idxm[i] - rstart; 115 if (roworiented) { 116 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 117 } else { 118 for (j=0; j<n; j++) { 119 if (idxn[j] < 0) continue; 120 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 121 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 122 } 123 } 124 } else if (!A->donotstash) { 125 mat->assembled = PETSC_FALSE; 126 if (roworiented) { 127 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 128 } else { 129 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 130 } 131 } 132 } 133 PetscFunctionReturn(0); 134 } 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatGetValues_MPIDense" 138 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 139 { 140 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 141 PetscErrorCode ierr; 142 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 143 144 PetscFunctionBegin; 145 for (i=0; i<m; i++) { 146 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 147 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 148 if (idxm[i] >= rstart && idxm[i] < rend) { 149 row = idxm[i] - rstart; 150 for (j=0; j<n; j++) { 151 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 152 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 153 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 154 } 155 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 156 } 157 PetscFunctionReturn(0); 158 } 159 160 #undef __FUNCT__ 161 #define __FUNCT__ "MatDenseGetArray_MPIDense" 162 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 163 { 164 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 165 PetscErrorCode ierr; 166 167 PetscFunctionBegin; 168 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 174 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 175 { 176 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 177 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 178 PetscErrorCode ierr; 179 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 180 const PetscInt *irow,*icol; 181 PetscScalar *av,*bv,*v = lmat->v; 182 Mat newmat; 183 IS iscol_local; 184 185 PetscFunctionBegin; 186 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 187 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 188 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 189 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 190 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 191 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 192 193 /* No parallel redistribution currently supported! Should really check each index set 194 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 195 original matrix! */ 196 197 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 198 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 199 200 /* Check submatrix call */ 201 if (scall == MAT_REUSE_MATRIX) { 202 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 203 /* Really need to test rows and column sizes! */ 204 newmat = *B; 205 } else { 206 /* Create and fill new matrix */ 207 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 208 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 209 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 210 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 211 } 212 213 /* Now extract the data pointers and do the copy, column at a time */ 214 newmatd = (Mat_MPIDense*)newmat->data; 215 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 216 217 for (i=0; i<Ncols; i++) { 218 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 219 for (j=0; j<nrows; j++) { 220 *bv++ = av[irow[j] - rstart]; 221 } 222 } 223 224 /* Assemble the matrices so that the correct flags are set */ 225 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 226 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 227 228 /* Free work space */ 229 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 230 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 231 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 232 *B = newmat; 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 238 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 239 { 240 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 241 PetscErrorCode ierr; 242 243 PetscFunctionBegin; 244 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 250 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 251 { 252 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 253 MPI_Comm comm = ((PetscObject)mat)->comm; 254 PetscErrorCode ierr; 255 PetscInt nstash,reallocs; 256 InsertMode addv; 257 258 PetscFunctionBegin; 259 /* make sure all processors are either in INSERTMODE or ADDMODE */ 260 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 261 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 262 mat->insertmode = addv; /* in case this processor had no cache */ 263 264 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 265 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 266 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 273 { 274 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 275 PetscErrorCode ierr; 276 PetscInt i,*row,*col,flg,j,rstart,ncols; 277 PetscMPIInt n; 278 PetscScalar *val; 279 InsertMode addv=mat->insertmode; 280 281 PetscFunctionBegin; 282 /* wait on receives */ 283 while (1) { 284 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 285 if (!flg) break; 286 287 for (i=0; i<n;) { 288 /* Now identify the consecutive vals belonging to the same row */ 289 for (j=i,rstart=row[j]; j<n; j++) { 290 if (row[j] != rstart) break; 291 } 292 if (j < n) ncols = j-i; 293 else ncols = n-i; 294 /* Now assemble all these values with a single function call */ 295 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 296 i = j; 297 } 298 } 299 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 300 301 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 302 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 303 304 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 305 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 306 } 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatZeroEntries_MPIDense" 312 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 313 { 314 PetscErrorCode ierr; 315 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 316 317 PetscFunctionBegin; 318 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 /* the code does not do the diagonal entries correctly unless the 323 matrix is square and the column and row owerships are identical. 324 This is a BUG. The only way to fix it seems to be to access 325 mdn->A and mdn->B directly and not through the MatZeroRows() 326 routine. 327 */ 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatZeroRows_MPIDense" 330 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 331 { 332 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 333 PetscErrorCode ierr; 334 PetscInt i,*owners = A->rmap->range; 335 PetscInt *nprocs,j,idx,nsends; 336 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 337 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 338 PetscInt *lens,*lrows,*values; 339 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 340 MPI_Comm comm = ((PetscObject)A)->comm; 341 MPI_Request *send_waits,*recv_waits; 342 MPI_Status recv_status,*send_status; 343 PetscBool found; 344 const PetscScalar *xx; 345 PetscScalar *bb; 346 347 PetscFunctionBegin; 348 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Only handles square matrices"); 349 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 350 /* first count number of contributors to each processor */ 351 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 352 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 353 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 354 for (i=0; i<N; i++) { 355 idx = rows[i]; 356 found = PETSC_FALSE; 357 for (j=0; j<size; j++) { 358 if (idx >= owners[j] && idx < owners[j+1]) { 359 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 360 } 361 } 362 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 363 } 364 nsends = 0; 365 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 366 367 /* inform other processors of number of messages and max length*/ 368 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 369 370 /* post receives: */ 371 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 372 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 373 for (i=0; i<nrecvs; i++) { 374 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 375 } 376 377 /* do sends: 378 1) starts[i] gives the starting index in svalues for stuff going to 379 the ith processor 380 */ 381 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 382 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 383 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 384 385 starts[0] = 0; 386 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 387 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 388 389 starts[0] = 0; 390 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 391 count = 0; 392 for (i=0; i<size; i++) { 393 if (nprocs[2*i+1]) { 394 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 395 } 396 } 397 ierr = PetscFree(starts);CHKERRQ(ierr); 398 399 base = owners[rank]; 400 401 /* wait on receives */ 402 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 403 count = nrecvs; 404 slen = 0; 405 while (count) { 406 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 407 /* unpack receives into our local space */ 408 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 409 410 source[imdex] = recv_status.MPI_SOURCE; 411 lens[imdex] = n; 412 slen += n; 413 count--; 414 } 415 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 416 417 /* move the data into the send scatter */ 418 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 419 count = 0; 420 for (i=0; i<nrecvs; i++) { 421 values = rvalues + i*nmax; 422 for (j=0; j<lens[i]; j++) { 423 lrows[count++] = values[j] - base; 424 } 425 } 426 ierr = PetscFree(rvalues);CHKERRQ(ierr); 427 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 428 ierr = PetscFree(owner);CHKERRQ(ierr); 429 ierr = PetscFree(nprocs);CHKERRQ(ierr); 430 431 /* fix right hand side if needed */ 432 if (x && b) { 433 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 434 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 435 for (i=0; i<slen; i++) { 436 bb[lrows[i]] = diag*xx[lrows[i]]; 437 } 438 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 439 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 440 } 441 442 /* actually zap the local rows */ 443 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 444 if (diag != 0.0) { 445 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 446 PetscInt m = ll->lda, i; 447 448 for (i=0; i<slen; i++) { 449 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 450 } 451 } 452 ierr = PetscFree(lrows);CHKERRQ(ierr); 453 454 /* wait on sends */ 455 if (nsends) { 456 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 457 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 458 ierr = PetscFree(send_status);CHKERRQ(ierr); 459 } 460 ierr = PetscFree(send_waits);CHKERRQ(ierr); 461 ierr = PetscFree(svalues);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 #undef __FUNCT__ 466 #define __FUNCT__ "MatMult_MPIDense" 467 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 468 { 469 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 470 PetscErrorCode ierr; 471 472 PetscFunctionBegin; 473 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 474 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 475 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatMultAdd_MPIDense" 481 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 482 { 483 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 484 PetscErrorCode ierr; 485 486 PetscFunctionBegin; 487 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 488 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 489 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 #undef __FUNCT__ 494 #define __FUNCT__ "MatMultTranspose_MPIDense" 495 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 496 { 497 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 498 PetscErrorCode ierr; 499 PetscScalar zero = 0.0; 500 501 PetscFunctionBegin; 502 ierr = VecSet(yy,zero);CHKERRQ(ierr); 503 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 504 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 505 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 506 PetscFunctionReturn(0); 507 } 508 509 #undef __FUNCT__ 510 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 511 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 512 { 513 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 514 PetscErrorCode ierr; 515 516 PetscFunctionBegin; 517 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 518 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 519 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 520 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 521 PetscFunctionReturn(0); 522 } 523 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatGetDiagonal_MPIDense" 526 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 527 { 528 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 529 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 530 PetscErrorCode ierr; 531 PetscInt len,i,n,m = A->rmap->n,radd; 532 PetscScalar *x,zero = 0.0; 533 534 PetscFunctionBegin; 535 ierr = VecSet(v,zero);CHKERRQ(ierr); 536 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 537 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 538 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 539 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 540 radd = A->rmap->rstart*m; 541 for (i=0; i<len; i++) { 542 x[i] = aloc->v[radd + i*m + i]; 543 } 544 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatDestroy_MPIDense" 550 PetscErrorCode MatDestroy_MPIDense(Mat mat) 551 { 552 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 553 PetscErrorCode ierr; 554 555 PetscFunctionBegin; 556 #if defined(PETSC_USE_LOG) 557 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 558 #endif 559 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 560 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 561 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 562 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 563 564 ierr = PetscFree(mat->data);CHKERRQ(ierr); 565 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 568 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 570 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatView_MPIDense_Binary" 576 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 577 { 578 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 579 PetscErrorCode ierr; 580 PetscViewerFormat format; 581 int fd; 582 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 583 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 584 PetscScalar *work,*v,*vv; 585 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 586 587 PetscFunctionBegin; 588 if (mdn->size == 1) { 589 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 590 } else { 591 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 592 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 593 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 594 595 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 596 if (format == PETSC_VIEWER_NATIVE) { 597 598 if (!rank) { 599 /* store the matrix as a dense matrix */ 600 header[0] = MAT_FILE_CLASSID; 601 header[1] = mat->rmap->N; 602 header[2] = N; 603 header[3] = MATRIX_BINARY_FORMAT_DENSE; 604 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 605 606 /* get largest work array needed for transposing array */ 607 mmax = mat->rmap->n; 608 for (i=1; i<size; i++) { 609 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 610 } 611 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 612 613 /* write out local array, by rows */ 614 m = mat->rmap->n; 615 v = a->v; 616 for (j=0; j<N; j++) { 617 for (i=0; i<m; i++) { 618 work[j + i*N] = *v++; 619 } 620 } 621 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 622 /* get largest work array to receive messages from other processes, excludes process zero */ 623 mmax = 0; 624 for (i=1; i<size; i++) { 625 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 626 } 627 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 628 for (k = 1; k < size; k++) { 629 v = vv; 630 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 631 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 632 633 for (j = 0; j < N; j++) { 634 for (i = 0; i < m; i++) { 635 work[j + i*N] = *v++; 636 } 637 } 638 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(work);CHKERRQ(ierr); 641 ierr = PetscFree(vv);CHKERRQ(ierr); 642 } else { 643 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 644 } 645 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 646 } 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 652 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 653 { 654 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 655 PetscErrorCode ierr; 656 PetscMPIInt size = mdn->size,rank = mdn->rank; 657 PetscViewerType vtype; 658 PetscBool iascii,isdraw; 659 PetscViewer sviewer; 660 PetscViewerFormat format; 661 662 PetscFunctionBegin; 663 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 664 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 665 if (iascii) { 666 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 667 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 668 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 669 MatInfo info; 670 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 671 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 672 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 673 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 674 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 675 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } else if (format == PETSC_VIEWER_ASCII_INFO) { 678 PetscFunctionReturn(0); 679 } 680 } else if (isdraw) { 681 PetscDraw draw; 682 PetscBool isnull; 683 684 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 685 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 686 if (isnull) PetscFunctionReturn(0); 687 } 688 689 if (size == 1) { 690 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 691 } else { 692 /* assemble the entire matrix onto first processor. */ 693 Mat A; 694 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 695 PetscInt *cols; 696 PetscScalar *vals; 697 698 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 699 if (!rank) { 700 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 701 } else { 702 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 703 } 704 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 705 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 706 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);CHKERRQ(ierr); 707 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 708 709 /* Copy the matrix ... This isn't the most efficient means, 710 but it's quick for now */ 711 A->insertmode = INSERT_VALUES; 712 713 row = mat->rmap->rstart; 714 m = mdn->A->rmap->n; 715 for (i=0; i<m; i++) { 716 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 717 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 718 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 719 row++; 720 } 721 722 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 723 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 724 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 725 if (!rank) { 726 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 727 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 728 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 729 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 730 } 731 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 732 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 733 ierr = MatDestroy(&A);CHKERRQ(ierr); 734 } 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatView_MPIDense" 740 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 741 { 742 PetscErrorCode ierr; 743 PetscBool iascii,isbinary,isdraw,issocket; 744 745 PetscFunctionBegin; 746 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 747 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 748 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 749 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 750 751 if (iascii || issocket || isdraw) { 752 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 753 } else if (isbinary) { 754 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 755 } 756 PetscFunctionReturn(0); 757 } 758 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatGetInfo_MPIDense" 761 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 762 { 763 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 764 Mat mdn = mat->A; 765 PetscErrorCode ierr; 766 PetscReal isend[5],irecv[5]; 767 768 PetscFunctionBegin; 769 info->block_size = 1.0; 770 771 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 772 773 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 774 isend[3] = info->memory; isend[4] = info->mallocs; 775 if (flag == MAT_LOCAL) { 776 info->nz_used = isend[0]; 777 info->nz_allocated = isend[1]; 778 info->nz_unneeded = isend[2]; 779 info->memory = isend[3]; 780 info->mallocs = isend[4]; 781 } else if (flag == MAT_GLOBAL_MAX) { 782 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 783 784 info->nz_used = irecv[0]; 785 info->nz_allocated = irecv[1]; 786 info->nz_unneeded = irecv[2]; 787 info->memory = irecv[3]; 788 info->mallocs = irecv[4]; 789 } else if (flag == MAT_GLOBAL_SUM) { 790 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 791 792 info->nz_used = irecv[0]; 793 info->nz_allocated = irecv[1]; 794 info->nz_unneeded = irecv[2]; 795 info->memory = irecv[3]; 796 info->mallocs = irecv[4]; 797 } 798 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 799 info->fill_ratio_needed = 0; 800 info->factor_mallocs = 0; 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatSetOption_MPIDense" 806 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 807 { 808 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 809 PetscErrorCode ierr; 810 811 PetscFunctionBegin; 812 switch (op) { 813 case MAT_NEW_NONZERO_LOCATIONS: 814 case MAT_NEW_NONZERO_LOCATION_ERR: 815 case MAT_NEW_NONZERO_ALLOCATION_ERR: 816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 817 break; 818 case MAT_ROW_ORIENTED: 819 a->roworiented = flg; 820 821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 822 break; 823 case MAT_NEW_DIAGONALS: 824 case MAT_KEEP_NONZERO_PATTERN: 825 case MAT_USE_HASH_TABLE: 826 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 827 break; 828 case MAT_IGNORE_OFF_PROC_ENTRIES: 829 a->donotstash = flg; 830 break; 831 case MAT_SYMMETRIC: 832 case MAT_STRUCTURALLY_SYMMETRIC: 833 case MAT_HERMITIAN: 834 case MAT_SYMMETRY_ETERNAL: 835 case MAT_IGNORE_LOWER_TRIANGULAR: 836 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 837 break; 838 default: 839 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 840 } 841 PetscFunctionReturn(0); 842 } 843 844 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatDiagonalScale_MPIDense" 847 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 848 { 849 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 850 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 851 PetscScalar *l,*r,x,*v; 852 PetscErrorCode ierr; 853 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 854 855 PetscFunctionBegin; 856 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 857 if (ll) { 858 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 859 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 860 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 861 for (i=0; i<m; i++) { 862 x = l[i]; 863 v = mat->v + i; 864 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 865 } 866 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 867 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 868 } 869 if (rr) { 870 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 871 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 872 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 875 for (i=0; i<n; i++) { 876 x = r[i]; 877 v = mat->v + i*m; 878 for (j=0; j<m; j++) (*v++) *= x; 879 } 880 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 881 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 882 } 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatNorm_MPIDense" 888 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 889 { 890 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 891 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 892 PetscErrorCode ierr; 893 PetscInt i,j; 894 PetscReal sum = 0.0; 895 PetscScalar *v = mat->v; 896 897 PetscFunctionBegin; 898 if (mdn->size == 1) { 899 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 900 } else { 901 if (type == NORM_FROBENIUS) { 902 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 903 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 904 } 905 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 906 *nrm = PetscSqrtReal(*nrm); 907 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 908 } else if (type == NORM_1) { 909 PetscReal *tmp,*tmp2; 910 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 911 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 912 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 913 *nrm = 0.0; 914 v = mat->v; 915 for (j=0; j<mdn->A->cmap->n; j++) { 916 for (i=0; i<mdn->A->rmap->n; i++) { 917 tmp[j] += PetscAbsScalar(*v); v++; 918 } 919 } 920 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 921 for (j=0; j<A->cmap->N; j++) { 922 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 923 } 924 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 925 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 926 } else if (type == NORM_INFINITY) { /* max row norm */ 927 PetscReal ntemp; 928 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 929 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 930 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 931 } 932 PetscFunctionReturn(0); 933 } 934 935 #undef __FUNCT__ 936 #define __FUNCT__ "MatTranspose_MPIDense" 937 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 938 { 939 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 940 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 941 Mat B; 942 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 943 PetscErrorCode ierr; 944 PetscInt j,i; 945 PetscScalar *v; 946 947 PetscFunctionBegin; 948 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 949 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 950 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 951 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 952 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 953 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 954 } else { 955 B = *matout; 956 } 957 958 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 959 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 960 for (i=0; i<m; i++) rwork[i] = rstart + i; 961 for (j=0; j<n; j++) { 962 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 963 v += m; 964 } 965 ierr = PetscFree(rwork);CHKERRQ(ierr); 966 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 968 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 969 *matout = B; 970 } else { 971 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 972 } 973 PetscFunctionReturn(0); 974 } 975 976 977 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 978 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatSetUp_MPIDense" 982 PetscErrorCode MatSetUp_MPIDense(Mat A) 983 { 984 PetscErrorCode ierr; 985 986 PetscFunctionBegin; 987 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatAXPY_MPIDense" 993 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 994 { 995 PetscErrorCode ierr; 996 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 997 998 PetscFunctionBegin; 999 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatConjugate_MPIDense" 1005 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1006 { 1007 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1008 PetscErrorCode ierr; 1009 1010 PetscFunctionBegin; 1011 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatRealPart_MPIDense" 1017 PetscErrorCode MatRealPart_MPIDense(Mat A) 1018 { 1019 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1020 PetscErrorCode ierr; 1021 1022 PetscFunctionBegin; 1023 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1029 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1030 { 1031 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1032 PetscErrorCode ierr; 1033 1034 PetscFunctionBegin; 1035 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1040 #undef __FUNCT__ 1041 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1042 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1043 { 1044 PetscErrorCode ierr; 1045 PetscInt i,n; 1046 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1047 PetscReal *work; 1048 1049 PetscFunctionBegin; 1050 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 1051 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 1052 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1053 if (type == NORM_2) { 1054 for (i=0; i<n; i++) work[i] *= work[i]; 1055 } 1056 if (type == NORM_INFINITY) { 1057 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1058 } else { 1059 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1060 } 1061 ierr = PetscFree(work);CHKERRQ(ierr); 1062 if (type == NORM_2) { 1063 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatSetRandom_MPIDense" 1070 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1071 { 1072 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1073 PetscErrorCode ierr; 1074 PetscScalar *a; 1075 PetscInt m,n,i; 1076 1077 PetscFunctionBegin; 1078 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1079 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1080 for (i=0; i<m*n; i++) { 1081 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1082 } 1083 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /* -------------------------------------------------------------------*/ 1088 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1089 MatGetRow_MPIDense, 1090 MatRestoreRow_MPIDense, 1091 MatMult_MPIDense, 1092 /* 4*/ MatMultAdd_MPIDense, 1093 MatMultTranspose_MPIDense, 1094 MatMultTransposeAdd_MPIDense, 1095 0, 1096 0, 1097 0, 1098 /* 10*/ 0, 1099 0, 1100 0, 1101 0, 1102 MatTranspose_MPIDense, 1103 /* 15*/ MatGetInfo_MPIDense, 1104 MatEqual_MPIDense, 1105 MatGetDiagonal_MPIDense, 1106 MatDiagonalScale_MPIDense, 1107 MatNorm_MPIDense, 1108 /* 20*/ MatAssemblyBegin_MPIDense, 1109 MatAssemblyEnd_MPIDense, 1110 MatSetOption_MPIDense, 1111 MatZeroEntries_MPIDense, 1112 /* 24*/ MatZeroRows_MPIDense, 1113 0, 1114 0, 1115 0, 1116 0, 1117 /* 29*/ MatSetUp_MPIDense, 1118 0, 1119 0, 1120 0, 1121 0, 1122 /* 34*/ MatDuplicate_MPIDense, 1123 0, 1124 0, 1125 0, 1126 0, 1127 /* 39*/ MatAXPY_MPIDense, 1128 MatGetSubMatrices_MPIDense, 1129 0, 1130 MatGetValues_MPIDense, 1131 0, 1132 /* 44*/ 0, 1133 MatScale_MPIDense, 1134 0, 1135 0, 1136 0, 1137 /* 49*/ MatSetRandom_MPIDense, 1138 0, 1139 0, 1140 0, 1141 0, 1142 /* 54*/ 0, 1143 0, 1144 0, 1145 0, 1146 0, 1147 /* 59*/ MatGetSubMatrix_MPIDense, 1148 MatDestroy_MPIDense, 1149 MatView_MPIDense, 1150 0, 1151 0, 1152 /* 64*/ 0, 1153 0, 1154 0, 1155 0, 1156 0, 1157 /* 69*/ 0, 1158 0, 1159 0, 1160 0, 1161 0, 1162 /* 74*/ 0, 1163 0, 1164 0, 1165 0, 1166 0, 1167 /* 79*/ 0, 1168 0, 1169 0, 1170 0, 1171 /* 83*/ MatLoad_MPIDense, 1172 0, 1173 0, 1174 0, 1175 0, 1176 0, 1177 /* 89*/ 1178 0, 1179 0, 1180 0, 1181 0, 1182 0, 1183 /* 94*/ 0, 1184 0, 1185 0, 1186 0, 1187 0, 1188 /* 99*/ 0, 1189 0, 1190 0, 1191 MatConjugate_MPIDense, 1192 0, 1193 /*104*/ 0, 1194 MatRealPart_MPIDense, 1195 MatImaginaryPart_MPIDense, 1196 0, 1197 0, 1198 /*109*/ 0, 1199 0, 1200 0, 1201 0, 1202 0, 1203 /*114*/ 0, 1204 0, 1205 0, 1206 0, 1207 0, 1208 /*119*/ 0, 1209 0, 1210 0, 1211 0, 1212 0, 1213 /*124*/ 0, 1214 MatGetColumnNorms_MPIDense 1215 }; 1216 1217 EXTERN_C_BEGIN 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1220 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1221 { 1222 Mat_MPIDense *a; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 mat->preallocated = PETSC_TRUE; 1227 /* Note: For now, when data is specified above, this assumes the user correctly 1228 allocates the local dense storage space. We should add error checking. */ 1229 1230 a = (Mat_MPIDense*)mat->data; 1231 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1232 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1233 a->nvec = mat->cmap->n; 1234 1235 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1236 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1237 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1238 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1239 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 EXTERN_C_END 1243 1244 EXTERN_C_BEGIN 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatCreate_MPIDense" 1247 PetscErrorCode MatCreate_MPIDense(Mat mat) 1248 { 1249 Mat_MPIDense *a; 1250 PetscErrorCode ierr; 1251 1252 PetscFunctionBegin; 1253 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1254 mat->data = (void*)a; 1255 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1256 1257 mat->insertmode = NOT_SET_VALUES; 1258 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1259 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1260 1261 /* build cache for off array entries formed */ 1262 a->donotstash = PETSC_FALSE; 1263 1264 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1265 1266 /* stuff used for matrix vector multiply */ 1267 a->lvec = 0; 1268 a->Mvctx = 0; 1269 a->roworiented = PETSC_TRUE; 1270 1271 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1273 1274 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1275 "MatGetDiagonalBlock_MPIDense", 1276 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1278 "MatMPIDenseSetPreallocation_MPIDense", 1279 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1281 "MatMatMult_MPIAIJ_MPIDense", 1282 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1284 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1285 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1287 "MatMatMultNumeric_MPIAIJ_MPIDense", 1288 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1289 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1290 PetscFunctionReturn(0); 1291 } 1292 EXTERN_C_END 1293 1294 /*MC 1295 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1296 1297 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1298 and MATMPIDENSE otherwise. 1299 1300 Options Database Keys: 1301 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1302 1303 Level: beginner 1304 1305 1306 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1307 M*/ 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1311 /*@C 1312 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1313 1314 Not collective 1315 1316 Input Parameters: 1317 . A - the matrix 1318 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1319 to control all matrix memory allocation. 1320 1321 Notes: 1322 The dense format is fully compatible with standard Fortran 77 1323 storage by columns. 1324 1325 The data input variable is intended primarily for Fortran programmers 1326 who wish to allocate their own matrix memory space. Most users should 1327 set data=PETSC_NULL. 1328 1329 Level: intermediate 1330 1331 .keywords: matrix,dense, parallel 1332 1333 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1334 @*/ 1335 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1336 { 1337 PetscErrorCode ierr; 1338 1339 PetscFunctionBegin; 1340 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatCreateDense" 1346 /*@C 1347 MatCreateDense - Creates a parallel matrix in dense format. 1348 1349 Collective on MPI_Comm 1350 1351 Input Parameters: 1352 + comm - MPI communicator 1353 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1354 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1355 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1356 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1357 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1358 to control all matrix memory allocation. 1359 1360 Output Parameter: 1361 . A - the matrix 1362 1363 Notes: 1364 The dense format is fully compatible with standard Fortran 77 1365 storage by columns. 1366 1367 The data input variable is intended primarily for Fortran programmers 1368 who wish to allocate their own matrix memory space. Most users should 1369 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1370 1371 The user MUST specify either the local or global matrix dimensions 1372 (possibly both). 1373 1374 Level: intermediate 1375 1376 .keywords: matrix,dense, parallel 1377 1378 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1379 @*/ 1380 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1381 { 1382 PetscErrorCode ierr; 1383 PetscMPIInt size; 1384 1385 PetscFunctionBegin; 1386 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1387 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1388 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1389 if (size > 1) { 1390 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1391 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1392 } else { 1393 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1394 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1395 } 1396 PetscFunctionReturn(0); 1397 } 1398 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatDuplicate_MPIDense" 1401 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1402 { 1403 Mat mat; 1404 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1405 PetscErrorCode ierr; 1406 1407 PetscFunctionBegin; 1408 *newmat = 0; 1409 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1410 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1411 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1412 a = (Mat_MPIDense*)mat->data; 1413 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1414 1415 mat->factortype = A->factortype; 1416 mat->assembled = PETSC_TRUE; 1417 mat->preallocated = PETSC_TRUE; 1418 1419 a->size = oldmat->size; 1420 a->rank = oldmat->rank; 1421 mat->insertmode = NOT_SET_VALUES; 1422 a->nvec = oldmat->nvec; 1423 a->donotstash = oldmat->donotstash; 1424 1425 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1426 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1427 1428 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1429 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1430 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1431 1432 *newmat = mat; 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1438 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1439 { 1440 PetscErrorCode ierr; 1441 PetscMPIInt rank,size; 1442 PetscInt *rowners,i,m,nz,j; 1443 PetscScalar *array,*vals,*vals_ptr; 1444 1445 PetscFunctionBegin; 1446 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1447 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1448 1449 /* determine ownership of all rows */ 1450 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1451 else m = newmat->rmap->n; 1452 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1453 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1454 rowners[0] = 0; 1455 for (i=2; i<=size; i++) { 1456 rowners[i] += rowners[i-1]; 1457 } 1458 1459 if (!sizesset) { 1460 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1461 } 1462 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1463 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1464 1465 if (!rank) { 1466 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1467 1468 /* read in my part of the matrix numerical values */ 1469 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1470 1471 /* insert into matrix-by row (this is why cannot directly read into array */ 1472 vals_ptr = vals; 1473 for (i=0; i<m; i++) { 1474 for (j=0; j<N; j++) { 1475 array[i + j*m] = *vals_ptr++; 1476 } 1477 } 1478 1479 /* read in other processors and ship out */ 1480 for (i=1; i<size; i++) { 1481 nz = (rowners[i+1] - rowners[i])*N; 1482 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1483 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1484 } 1485 } else { 1486 /* receive numeric values */ 1487 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1488 1489 /* receive message of values*/ 1490 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1491 1492 /* insert into matrix-by row (this is why cannot directly read into array */ 1493 vals_ptr = vals; 1494 for (i=0; i<m; i++) { 1495 for (j=0; j<N; j++) { 1496 array[i + j*m] = *vals_ptr++; 1497 } 1498 } 1499 } 1500 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1501 ierr = PetscFree(rowners);CHKERRQ(ierr); 1502 ierr = PetscFree(vals);CHKERRQ(ierr); 1503 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1504 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1505 PetscFunctionReturn(0); 1506 } 1507 1508 #undef __FUNCT__ 1509 #define __FUNCT__ "MatLoad_MPIDense" 1510 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1511 { 1512 PetscScalar *vals,*svals; 1513 MPI_Comm comm = ((PetscObject)viewer)->comm; 1514 MPI_Status status; 1515 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1516 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1517 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1518 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1519 int fd; 1520 PetscErrorCode ierr; 1521 1522 PetscFunctionBegin; 1523 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1524 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1525 if (!rank) { 1526 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1527 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1528 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1529 } 1530 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1531 1532 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1533 M = header[1]; N = header[2]; nz = header[3]; 1534 1535 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1536 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1537 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1538 1539 /* If global sizes are set, check if they are consistent with that given in the file */ 1540 if (sizesset) { 1541 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1542 } 1543 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1544 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1545 1546 /* 1547 Handle case where matrix is stored on disk as a dense matrix 1548 */ 1549 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1550 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1551 PetscFunctionReturn(0); 1552 } 1553 1554 /* determine ownership of all rows */ 1555 if (newmat->rmap->n < 0) { 1556 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1557 } else { 1558 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1559 } 1560 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1561 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1562 rowners[0] = 0; 1563 for (i=2; i<=size; i++) { 1564 rowners[i] += rowners[i-1]; 1565 } 1566 rstart = rowners[rank]; 1567 rend = rowners[rank+1]; 1568 1569 /* distribute row lengths to all processors */ 1570 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1571 if (!rank) { 1572 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1573 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1574 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1575 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1576 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1577 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1578 } else { 1579 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1580 } 1581 1582 if (!rank) { 1583 /* calculate the number of nonzeros on each processor */ 1584 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1585 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1586 for (i=0; i<size; i++) { 1587 for (j=rowners[i]; j< rowners[i+1]; j++) { 1588 procsnz[i] += rowlengths[j]; 1589 } 1590 } 1591 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1592 1593 /* determine max buffer needed and allocate it */ 1594 maxnz = 0; 1595 for (i=0; i<size; i++) { 1596 maxnz = PetscMax(maxnz,procsnz[i]); 1597 } 1598 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1599 1600 /* read in my part of the matrix column indices */ 1601 nz = procsnz[0]; 1602 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1603 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1604 1605 /* read in every one elses and ship off */ 1606 for (i=1; i<size; i++) { 1607 nz = procsnz[i]; 1608 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1609 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1610 } 1611 ierr = PetscFree(cols);CHKERRQ(ierr); 1612 } else { 1613 /* determine buffer space needed for message */ 1614 nz = 0; 1615 for (i=0; i<m; i++) { 1616 nz += ourlens[i]; 1617 } 1618 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1619 1620 /* receive message of column indices*/ 1621 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1622 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1623 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1624 } 1625 1626 /* loop over local rows, determining number of off diagonal entries */ 1627 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1628 jj = 0; 1629 for (i=0; i<m; i++) { 1630 for (j=0; j<ourlens[i]; j++) { 1631 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1632 jj++; 1633 } 1634 } 1635 1636 /* create our matrix */ 1637 for (i=0; i<m; i++) ourlens[i] -= offlens[i]; 1638 1639 if (!sizesset) { 1640 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1641 } 1642 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1643 for (i=0; i<m; i++) ourlens[i] += offlens[i]; 1644 1645 if (!rank) { 1646 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1647 1648 /* read in my part of the matrix numerical values */ 1649 nz = procsnz[0]; 1650 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1651 1652 /* insert into matrix */ 1653 jj = rstart; 1654 smycols = mycols; 1655 svals = vals; 1656 for (i=0; i<m; i++) { 1657 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1658 smycols += ourlens[i]; 1659 svals += ourlens[i]; 1660 jj++; 1661 } 1662 1663 /* read in other processors and ship out */ 1664 for (i=1; i<size; i++) { 1665 nz = procsnz[i]; 1666 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1667 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1668 } 1669 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1670 } else { 1671 /* receive numeric values */ 1672 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1673 1674 /* receive message of values*/ 1675 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1676 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1677 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1678 1679 /* insert into matrix */ 1680 jj = rstart; 1681 smycols = mycols; 1682 svals = vals; 1683 for (i=0; i<m; i++) { 1684 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1685 smycols += ourlens[i]; 1686 svals += ourlens[i]; 1687 jj++; 1688 } 1689 } 1690 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 1691 ierr = PetscFree(vals);CHKERRQ(ierr); 1692 ierr = PetscFree(mycols);CHKERRQ(ierr); 1693 ierr = PetscFree(rowners);CHKERRQ(ierr); 1694 1695 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1696 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1697 PetscFunctionReturn(0); 1698 } 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatEqual_MPIDense" 1702 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1703 { 1704 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1705 Mat a,b; 1706 PetscBool flg; 1707 PetscErrorCode ierr; 1708 1709 PetscFunctionBegin; 1710 a = matA->A; 1711 b = matB->A; 1712 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1713 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1714 PetscFunctionReturn(0); 1715 } 1716 1717