1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatGetRow_MPIDense" 40 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 41 { 42 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 43 PetscErrorCode ierr; 44 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 45 46 PetscFunctionBegin; 47 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 48 lrow = row - rstart; 49 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatRestoreRow_MPIDense" 55 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 56 { 57 PetscErrorCode ierr; 58 59 PetscFunctionBegin; 60 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 61 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 62 PetscFunctionReturn(0); 63 } 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 68 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 69 { 70 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 71 PetscErrorCode ierr; 72 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 73 PetscScalar *array; 74 MPI_Comm comm; 75 Mat B; 76 77 PetscFunctionBegin; 78 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 79 80 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 81 if (!B) { 82 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 83 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 84 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 85 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 86 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 87 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 88 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 89 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 90 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 92 *a = B; 93 ierr = MatDestroy(&B);CHKERRQ(ierr); 94 } else *a = B; 95 PetscFunctionReturn(0); 96 } 97 EXTERN_C_END 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatSetValues_MPIDense" 101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 102 { 103 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 104 PetscErrorCode ierr; 105 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 106 PetscBool roworiented = A->roworiented; 107 108 PetscFunctionBegin; 109 if (v) PetscValidScalarPointer(v,6); 110 for (i=0; i<m; i++) { 111 if (idxm[i] < 0) continue; 112 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 113 if (idxm[i] >= rstart && idxm[i] < rend) { 114 row = idxm[i] - rstart; 115 if (roworiented) { 116 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 117 } else { 118 for (j=0; j<n; j++) { 119 if (idxn[j] < 0) continue; 120 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 121 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 122 } 123 } 124 } else if (!A->donotstash) { 125 mat->assembled = PETSC_FALSE; 126 if (roworiented) { 127 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 128 } else { 129 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 130 } 131 } 132 } 133 PetscFunctionReturn(0); 134 } 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatGetValues_MPIDense" 138 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 139 { 140 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 141 PetscErrorCode ierr; 142 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 143 144 PetscFunctionBegin; 145 for (i=0; i<m; i++) { 146 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 147 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 148 if (idxm[i] >= rstart && idxm[i] < rend) { 149 row = idxm[i] - rstart; 150 for (j=0; j<n; j++) { 151 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 152 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 153 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 154 } 155 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 156 } 157 PetscFunctionReturn(0); 158 } 159 160 #undef __FUNCT__ 161 #define __FUNCT__ "MatDenseGetArray_MPIDense" 162 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 163 { 164 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 165 PetscErrorCode ierr; 166 167 PetscFunctionBegin; 168 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 174 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 175 { 176 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 177 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 178 PetscErrorCode ierr; 179 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 180 const PetscInt *irow,*icol; 181 PetscScalar *av,*bv,*v = lmat->v; 182 Mat newmat; 183 IS iscol_local; 184 185 PetscFunctionBegin; 186 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 187 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 188 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 189 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 190 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 191 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 192 193 /* No parallel redistribution currently supported! Should really check each index set 194 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 195 original matrix! */ 196 197 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 198 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 199 200 /* Check submatrix call */ 201 if (scall == MAT_REUSE_MATRIX) { 202 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 203 /* Really need to test rows and column sizes! */ 204 newmat = *B; 205 } else { 206 /* Create and fill new matrix */ 207 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 208 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 209 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 210 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 211 } 212 213 /* Now extract the data pointers and do the copy, column at a time */ 214 newmatd = (Mat_MPIDense*)newmat->data; 215 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 216 217 for (i=0; i<Ncols; i++) { 218 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 219 for (j=0; j<nrows; j++) { 220 *bv++ = av[irow[j] - rstart]; 221 } 222 } 223 224 /* Assemble the matrices so that the correct flags are set */ 225 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 226 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 227 228 /* Free work space */ 229 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 230 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 231 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 232 *B = newmat; 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 238 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 239 { 240 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 241 PetscErrorCode ierr; 242 243 PetscFunctionBegin; 244 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 250 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 251 { 252 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 253 MPI_Comm comm; 254 PetscErrorCode ierr; 255 PetscInt nstash,reallocs; 256 InsertMode addv; 257 258 PetscFunctionBegin; 259 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 260 /* make sure all processors are either in INSERTMODE or ADDMODE */ 261 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 262 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 263 mat->insertmode = addv; /* in case this processor had no cache */ 264 265 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 266 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 267 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 273 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 274 { 275 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 276 PetscErrorCode ierr; 277 PetscInt i,*row,*col,flg,j,rstart,ncols; 278 PetscMPIInt n; 279 PetscScalar *val; 280 InsertMode addv=mat->insertmode; 281 282 PetscFunctionBegin; 283 /* wait on receives */ 284 while (1) { 285 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 286 if (!flg) break; 287 288 for (i=0; i<n;) { 289 /* Now identify the consecutive vals belonging to the same row */ 290 for (j=i,rstart=row[j]; j<n; j++) { 291 if (row[j] != rstart) break; 292 } 293 if (j < n) ncols = j-i; 294 else ncols = n-i; 295 /* Now assemble all these values with a single function call */ 296 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 297 i = j; 298 } 299 } 300 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 301 302 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 303 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 304 305 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 306 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 307 } 308 PetscFunctionReturn(0); 309 } 310 311 #undef __FUNCT__ 312 #define __FUNCT__ "MatZeroEntries_MPIDense" 313 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 314 { 315 PetscErrorCode ierr; 316 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 317 318 PetscFunctionBegin; 319 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 /* the code does not do the diagonal entries correctly unless the 324 matrix is square and the column and row owerships are identical. 325 This is a BUG. The only way to fix it seems to be to access 326 mdn->A and mdn->B directly and not through the MatZeroRows() 327 routine. 328 */ 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatZeroRows_MPIDense" 331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 332 { 333 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 334 PetscErrorCode ierr; 335 PetscInt i,*owners = A->rmap->range; 336 PetscInt *nprocs,j,idx,nsends; 337 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 338 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 339 PetscInt *lens,*lrows,*values; 340 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 341 MPI_Comm comm; 342 MPI_Request *send_waits,*recv_waits; 343 MPI_Status recv_status,*send_status; 344 PetscBool found; 345 const PetscScalar *xx; 346 PetscScalar *bb; 347 348 PetscFunctionBegin; 349 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 350 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 351 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 352 /* first count number of contributors to each processor */ 353 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 354 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 355 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 356 for (i=0; i<N; i++) { 357 idx = rows[i]; 358 found = PETSC_FALSE; 359 for (j=0; j<size; j++) { 360 if (idx >= owners[j] && idx < owners[j+1]) { 361 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 362 } 363 } 364 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 365 } 366 nsends = 0; 367 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 368 369 /* inform other processors of number of messages and max length*/ 370 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 371 372 /* post receives: */ 373 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 374 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 375 for (i=0; i<nrecvs; i++) { 376 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 377 } 378 379 /* do sends: 380 1) starts[i] gives the starting index in svalues for stuff going to 381 the ith processor 382 */ 383 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 384 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 385 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 386 387 starts[0] = 0; 388 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 389 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 390 391 starts[0] = 0; 392 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 393 count = 0; 394 for (i=0; i<size; i++) { 395 if (nprocs[2*i+1]) { 396 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 397 } 398 } 399 ierr = PetscFree(starts);CHKERRQ(ierr); 400 401 base = owners[rank]; 402 403 /* wait on receives */ 404 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 405 count = nrecvs; 406 slen = 0; 407 while (count) { 408 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 409 /* unpack receives into our local space */ 410 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 411 412 source[imdex] = recv_status.MPI_SOURCE; 413 lens[imdex] = n; 414 slen += n; 415 count--; 416 } 417 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 418 419 /* move the data into the send scatter */ 420 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 421 count = 0; 422 for (i=0; i<nrecvs; i++) { 423 values = rvalues + i*nmax; 424 for (j=0; j<lens[i]; j++) { 425 lrows[count++] = values[j] - base; 426 } 427 } 428 ierr = PetscFree(rvalues);CHKERRQ(ierr); 429 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 430 ierr = PetscFree(owner);CHKERRQ(ierr); 431 ierr = PetscFree(nprocs);CHKERRQ(ierr); 432 433 /* fix right hand side if needed */ 434 if (x && b) { 435 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 436 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 437 for (i=0; i<slen; i++) { 438 bb[lrows[i]] = diag*xx[lrows[i]]; 439 } 440 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 441 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 442 } 443 444 /* actually zap the local rows */ 445 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 446 if (diag != 0.0) { 447 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 448 PetscInt m = ll->lda, i; 449 450 for (i=0; i<slen; i++) { 451 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 452 } 453 } 454 ierr = PetscFree(lrows);CHKERRQ(ierr); 455 456 /* wait on sends */ 457 if (nsends) { 458 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 459 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 460 ierr = PetscFree(send_status);CHKERRQ(ierr); 461 } 462 ierr = PetscFree(send_waits);CHKERRQ(ierr); 463 ierr = PetscFree(svalues);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatMult_MPIDense" 469 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 470 { 471 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 472 PetscErrorCode ierr; 473 474 PetscFunctionBegin; 475 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "MatMultAdd_MPIDense" 483 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 484 { 485 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 490 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 491 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatMultTranspose_MPIDense" 497 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 498 { 499 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 500 PetscErrorCode ierr; 501 PetscScalar zero = 0.0; 502 503 PetscFunctionBegin; 504 ierr = VecSet(yy,zero);CHKERRQ(ierr); 505 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 506 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 507 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNCT__ 512 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 513 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 514 { 515 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 516 PetscErrorCode ierr; 517 518 PetscFunctionBegin; 519 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 520 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 521 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 522 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatGetDiagonal_MPIDense" 528 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 529 { 530 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 531 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 532 PetscErrorCode ierr; 533 PetscInt len,i,n,m = A->rmap->n,radd; 534 PetscScalar *x,zero = 0.0; 535 536 PetscFunctionBegin; 537 ierr = VecSet(v,zero);CHKERRQ(ierr); 538 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 539 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 540 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 541 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 542 radd = A->rmap->rstart*m; 543 for (i=0; i<len; i++) { 544 x[i] = aloc->v[radd + i*m + i]; 545 } 546 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 547 PetscFunctionReturn(0); 548 } 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "MatDestroy_MPIDense" 552 PetscErrorCode MatDestroy_MPIDense(Mat mat) 553 { 554 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 555 PetscErrorCode ierr; 556 557 PetscFunctionBegin; 558 #if defined(PETSC_USE_LOG) 559 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 560 #endif 561 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 562 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 563 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 564 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 565 566 ierr = PetscFree(mat->data);CHKERRQ(ierr); 567 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 568 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 569 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",NULL);CHKERRQ(ierr); 570 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr); 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatView_MPIDense_Binary" 578 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 579 { 580 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 581 PetscErrorCode ierr; 582 PetscViewerFormat format; 583 int fd; 584 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 585 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 586 PetscScalar *work,*v,*vv; 587 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 588 589 PetscFunctionBegin; 590 if (mdn->size == 1) { 591 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 592 } else { 593 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 594 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 595 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 596 597 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 598 if (format == PETSC_VIEWER_NATIVE) { 599 600 if (!rank) { 601 /* store the matrix as a dense matrix */ 602 header[0] = MAT_FILE_CLASSID; 603 header[1] = mat->rmap->N; 604 header[2] = N; 605 header[3] = MATRIX_BINARY_FORMAT_DENSE; 606 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 607 608 /* get largest work array needed for transposing array */ 609 mmax = mat->rmap->n; 610 for (i=1; i<size; i++) { 611 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 612 } 613 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 614 615 /* write out local array, by rows */ 616 m = mat->rmap->n; 617 v = a->v; 618 for (j=0; j<N; j++) { 619 for (i=0; i<m; i++) { 620 work[j + i*N] = *v++; 621 } 622 } 623 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 624 /* get largest work array to receive messages from other processes, excludes process zero */ 625 mmax = 0; 626 for (i=1; i<size; i++) { 627 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 628 } 629 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 630 for (k = 1; k < size; k++) { 631 v = vv; 632 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 633 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 634 635 for (j = 0; j < N; j++) { 636 for (i = 0; i < m; i++) { 637 work[j + i*N] = *v++; 638 } 639 } 640 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 641 } 642 ierr = PetscFree(work);CHKERRQ(ierr); 643 ierr = PetscFree(vv);CHKERRQ(ierr); 644 } else { 645 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 646 } 647 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 648 } 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 654 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 655 { 656 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 657 PetscErrorCode ierr; 658 PetscMPIInt size = mdn->size,rank = mdn->rank; 659 PetscViewerType vtype; 660 PetscBool iascii,isdraw; 661 PetscViewer sviewer; 662 PetscViewerFormat format; 663 664 PetscFunctionBegin; 665 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 666 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 667 if (iascii) { 668 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 669 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 670 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 671 MatInfo info; 672 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 673 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 674 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 675 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 676 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 677 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } else if (format == PETSC_VIEWER_ASCII_INFO) { 680 PetscFunctionReturn(0); 681 } 682 } else if (isdraw) { 683 PetscDraw draw; 684 PetscBool isnull; 685 686 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 687 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 688 if (isnull) PetscFunctionReturn(0); 689 } 690 691 if (size == 1) { 692 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 693 } else { 694 /* assemble the entire matrix onto first processor. */ 695 Mat A; 696 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 697 PetscInt *cols; 698 PetscScalar *vals; 699 700 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 701 if (!rank) { 702 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 703 } else { 704 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 705 } 706 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 707 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 708 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 709 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 710 711 /* Copy the matrix ... This isn't the most efficient means, 712 but it's quick for now */ 713 A->insertmode = INSERT_VALUES; 714 715 row = mat->rmap->rstart; 716 m = mdn->A->rmap->n; 717 for (i=0; i<m; i++) { 718 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 719 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 720 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 721 row++; 722 } 723 724 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 725 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 726 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 727 if (!rank) { 728 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 729 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 730 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 731 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 732 } 733 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 734 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 735 ierr = MatDestroy(&A);CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIDense" 742 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 743 { 744 PetscErrorCode ierr; 745 PetscBool iascii,isbinary,isdraw,issocket; 746 747 PetscFunctionBegin; 748 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 749 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 750 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 751 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 752 753 if (iascii || issocket || isdraw) { 754 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 755 } else if (isbinary) { 756 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 757 } 758 PetscFunctionReturn(0); 759 } 760 761 #undef __FUNCT__ 762 #define __FUNCT__ "MatGetInfo_MPIDense" 763 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 764 { 765 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 766 Mat mdn = mat->A; 767 PetscErrorCode ierr; 768 PetscReal isend[5],irecv[5]; 769 770 PetscFunctionBegin; 771 info->block_size = 1.0; 772 773 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 774 775 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 776 isend[3] = info->memory; isend[4] = info->mallocs; 777 if (flag == MAT_LOCAL) { 778 info->nz_used = isend[0]; 779 info->nz_allocated = isend[1]; 780 info->nz_unneeded = isend[2]; 781 info->memory = isend[3]; 782 info->mallocs = isend[4]; 783 } else if (flag == MAT_GLOBAL_MAX) { 784 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 785 786 info->nz_used = irecv[0]; 787 info->nz_allocated = irecv[1]; 788 info->nz_unneeded = irecv[2]; 789 info->memory = irecv[3]; 790 info->mallocs = irecv[4]; 791 } else if (flag == MAT_GLOBAL_SUM) { 792 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 793 794 info->nz_used = irecv[0]; 795 info->nz_allocated = irecv[1]; 796 info->nz_unneeded = irecv[2]; 797 info->memory = irecv[3]; 798 info->mallocs = irecv[4]; 799 } 800 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 801 info->fill_ratio_needed = 0; 802 info->factor_mallocs = 0; 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNCT__ 807 #define __FUNCT__ "MatSetOption_MPIDense" 808 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 809 { 810 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 811 PetscErrorCode ierr; 812 813 PetscFunctionBegin; 814 switch (op) { 815 case MAT_NEW_NONZERO_LOCATIONS: 816 case MAT_NEW_NONZERO_LOCATION_ERR: 817 case MAT_NEW_NONZERO_ALLOCATION_ERR: 818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 819 break; 820 case MAT_ROW_ORIENTED: 821 a->roworiented = flg; 822 823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 824 break; 825 case MAT_NEW_DIAGONALS: 826 case MAT_KEEP_NONZERO_PATTERN: 827 case MAT_USE_HASH_TABLE: 828 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 829 break; 830 case MAT_IGNORE_OFF_PROC_ENTRIES: 831 a->donotstash = flg; 832 break; 833 case MAT_SYMMETRIC: 834 case MAT_STRUCTURALLY_SYMMETRIC: 835 case MAT_HERMITIAN: 836 case MAT_SYMMETRY_ETERNAL: 837 case MAT_IGNORE_LOWER_TRIANGULAR: 838 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 839 break; 840 default: 841 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 842 } 843 PetscFunctionReturn(0); 844 } 845 846 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatDiagonalScale_MPIDense" 849 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 850 { 851 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 852 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 853 PetscScalar *l,*r,x,*v; 854 PetscErrorCode ierr; 855 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 856 857 PetscFunctionBegin; 858 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 859 if (ll) { 860 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 861 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 862 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 863 for (i=0; i<m; i++) { 864 x = l[i]; 865 v = mat->v + i; 866 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 867 } 868 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 869 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 870 } 871 if (rr) { 872 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 873 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 874 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 876 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 877 for (i=0; i<n; i++) { 878 x = r[i]; 879 v = mat->v + i*m; 880 for (j=0; j<m; j++) (*v++) *= x; 881 } 882 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 883 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 884 } 885 PetscFunctionReturn(0); 886 } 887 888 #undef __FUNCT__ 889 #define __FUNCT__ "MatNorm_MPIDense" 890 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 891 { 892 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 893 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 894 PetscErrorCode ierr; 895 PetscInt i,j; 896 PetscReal sum = 0.0; 897 PetscScalar *v = mat->v; 898 899 PetscFunctionBegin; 900 if (mdn->size == 1) { 901 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 902 } else { 903 if (type == NORM_FROBENIUS) { 904 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 905 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 906 } 907 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 908 *nrm = PetscSqrtReal(*nrm); 909 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 910 } else if (type == NORM_1) { 911 PetscReal *tmp,*tmp2; 912 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 913 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 914 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 915 *nrm = 0.0; 916 v = mat->v; 917 for (j=0; j<mdn->A->cmap->n; j++) { 918 for (i=0; i<mdn->A->rmap->n; i++) { 919 tmp[j] += PetscAbsScalar(*v); v++; 920 } 921 } 922 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 923 for (j=0; j<A->cmap->N; j++) { 924 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 925 } 926 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 927 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 928 } else if (type == NORM_INFINITY) { /* max row norm */ 929 PetscReal ntemp; 930 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 931 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 932 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 933 } 934 PetscFunctionReturn(0); 935 } 936 937 #undef __FUNCT__ 938 #define __FUNCT__ "MatTranspose_MPIDense" 939 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 940 { 941 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 942 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 943 Mat B; 944 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 945 PetscErrorCode ierr; 946 PetscInt j,i; 947 PetscScalar *v; 948 949 PetscFunctionBegin; 950 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 951 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 952 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 953 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 954 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 955 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 956 } else { 957 B = *matout; 958 } 959 960 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 961 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 962 for (i=0; i<m; i++) rwork[i] = rstart + i; 963 for (j=0; j<n; j++) { 964 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 965 v += m; 966 } 967 ierr = PetscFree(rwork);CHKERRQ(ierr); 968 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 969 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 970 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 971 *matout = B; 972 } else { 973 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 974 } 975 PetscFunctionReturn(0); 976 } 977 978 979 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 980 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "MatSetUp_MPIDense" 984 PetscErrorCode MatSetUp_MPIDense(Mat A) 985 { 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatAXPY_MPIDense" 995 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 996 { 997 PetscErrorCode ierr; 998 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 999 1000 PetscFunctionBegin; 1001 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatConjugate_MPIDense" 1007 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1008 { 1009 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1010 PetscErrorCode ierr; 1011 1012 PetscFunctionBegin; 1013 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1014 PetscFunctionReturn(0); 1015 } 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatRealPart_MPIDense" 1019 PetscErrorCode MatRealPart_MPIDense(Mat A) 1020 { 1021 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1022 PetscErrorCode ierr; 1023 1024 PetscFunctionBegin; 1025 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1031 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1032 { 1033 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1034 PetscErrorCode ierr; 1035 1036 PetscFunctionBegin; 1037 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 1041 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1044 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1045 { 1046 PetscErrorCode ierr; 1047 PetscInt i,n; 1048 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1049 PetscReal *work; 1050 1051 PetscFunctionBegin; 1052 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1053 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 1054 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1055 if (type == NORM_2) { 1056 for (i=0; i<n; i++) work[i] *= work[i]; 1057 } 1058 if (type == NORM_INFINITY) { 1059 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1060 } else { 1061 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1062 } 1063 ierr = PetscFree(work);CHKERRQ(ierr); 1064 if (type == NORM_2) { 1065 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1066 } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatSetRandom_MPIDense" 1072 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1073 { 1074 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1075 PetscErrorCode ierr; 1076 PetscScalar *a; 1077 PetscInt m,n,i; 1078 1079 PetscFunctionBegin; 1080 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1081 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1082 for (i=0; i<m*n; i++) { 1083 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1084 } 1085 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 /* -------------------------------------------------------------------*/ 1090 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1091 MatGetRow_MPIDense, 1092 MatRestoreRow_MPIDense, 1093 MatMult_MPIDense, 1094 /* 4*/ MatMultAdd_MPIDense, 1095 MatMultTranspose_MPIDense, 1096 MatMultTransposeAdd_MPIDense, 1097 0, 1098 0, 1099 0, 1100 /* 10*/ 0, 1101 0, 1102 0, 1103 0, 1104 MatTranspose_MPIDense, 1105 /* 15*/ MatGetInfo_MPIDense, 1106 MatEqual_MPIDense, 1107 MatGetDiagonal_MPIDense, 1108 MatDiagonalScale_MPIDense, 1109 MatNorm_MPIDense, 1110 /* 20*/ MatAssemblyBegin_MPIDense, 1111 MatAssemblyEnd_MPIDense, 1112 MatSetOption_MPIDense, 1113 MatZeroEntries_MPIDense, 1114 /* 24*/ MatZeroRows_MPIDense, 1115 0, 1116 0, 1117 0, 1118 0, 1119 /* 29*/ MatSetUp_MPIDense, 1120 0, 1121 0, 1122 0, 1123 0, 1124 /* 34*/ MatDuplicate_MPIDense, 1125 0, 1126 0, 1127 0, 1128 0, 1129 /* 39*/ MatAXPY_MPIDense, 1130 MatGetSubMatrices_MPIDense, 1131 0, 1132 MatGetValues_MPIDense, 1133 0, 1134 /* 44*/ 0, 1135 MatScale_MPIDense, 1136 0, 1137 0, 1138 0, 1139 /* 49*/ MatSetRandom_MPIDense, 1140 0, 1141 0, 1142 0, 1143 0, 1144 /* 54*/ 0, 1145 0, 1146 0, 1147 0, 1148 0, 1149 /* 59*/ MatGetSubMatrix_MPIDense, 1150 MatDestroy_MPIDense, 1151 MatView_MPIDense, 1152 0, 1153 0, 1154 /* 64*/ 0, 1155 0, 1156 0, 1157 0, 1158 0, 1159 /* 69*/ 0, 1160 0, 1161 0, 1162 0, 1163 0, 1164 /* 74*/ 0, 1165 0, 1166 0, 1167 0, 1168 0, 1169 /* 79*/ 0, 1170 0, 1171 0, 1172 0, 1173 /* 83*/ MatLoad_MPIDense, 1174 0, 1175 0, 1176 0, 1177 0, 1178 0, 1179 /* 89*/ 1180 0, 1181 0, 1182 0, 1183 0, 1184 0, 1185 /* 94*/ 0, 1186 0, 1187 0, 1188 0, 1189 0, 1190 /* 99*/ 0, 1191 0, 1192 0, 1193 MatConjugate_MPIDense, 1194 0, 1195 /*104*/ 0, 1196 MatRealPart_MPIDense, 1197 MatImaginaryPart_MPIDense, 1198 0, 1199 0, 1200 /*109*/ 0, 1201 0, 1202 0, 1203 0, 1204 0, 1205 /*114*/ 0, 1206 0, 1207 0, 1208 0, 1209 0, 1210 /*119*/ 0, 1211 0, 1212 0, 1213 0, 1214 0, 1215 /*124*/ 0, 1216 MatGetColumnNorms_MPIDense 1217 }; 1218 1219 EXTERN_C_BEGIN 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1222 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1223 { 1224 Mat_MPIDense *a; 1225 PetscErrorCode ierr; 1226 1227 PetscFunctionBegin; 1228 mat->preallocated = PETSC_TRUE; 1229 /* Note: For now, when data is specified above, this assumes the user correctly 1230 allocates the local dense storage space. We should add error checking. */ 1231 1232 a = (Mat_MPIDense*)mat->data; 1233 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1234 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1235 a->nvec = mat->cmap->n; 1236 1237 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1238 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1239 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1240 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1241 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 EXTERN_C_END 1245 1246 EXTERN_C_BEGIN 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatCreate_MPIDense" 1249 PetscErrorCode MatCreate_MPIDense(Mat mat) 1250 { 1251 Mat_MPIDense *a; 1252 PetscErrorCode ierr; 1253 1254 PetscFunctionBegin; 1255 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1256 mat->data = (void*)a; 1257 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1258 1259 mat->insertmode = NOT_SET_VALUES; 1260 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1261 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1262 1263 /* build cache for off array entries formed */ 1264 a->donotstash = PETSC_FALSE; 1265 1266 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1267 1268 /* stuff used for matrix vector multiply */ 1269 a->lvec = 0; 1270 a->Mvctx = 0; 1271 a->roworiented = PETSC_TRUE; 1272 1273 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1277 "MatGetDiagonalBlock_MPIDense", 1278 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1280 "MatMPIDenseSetPreallocation_MPIDense", 1281 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1283 "MatMatMult_MPIAIJ_MPIDense", 1284 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1286 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1287 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1289 "MatMatMultNumeric_MPIAIJ_MPIDense", 1290 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1291 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 EXTERN_C_END 1295 1296 /*MC 1297 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1298 1299 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1300 and MATMPIDENSE otherwise. 1301 1302 Options Database Keys: 1303 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1304 1305 Level: beginner 1306 1307 1308 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1309 M*/ 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1313 /*@C 1314 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1315 1316 Not collective 1317 1318 Input Parameters: 1319 . A - the matrix 1320 - data - optional location of matrix data. Set data=NULL for PETSc 1321 to control all matrix memory allocation. 1322 1323 Notes: 1324 The dense format is fully compatible with standard Fortran 77 1325 storage by columns. 1326 1327 The data input variable is intended primarily for Fortran programmers 1328 who wish to allocate their own matrix memory space. Most users should 1329 set data=NULL. 1330 1331 Level: intermediate 1332 1333 .keywords: matrix,dense, parallel 1334 1335 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1336 @*/ 1337 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1338 { 1339 PetscErrorCode ierr; 1340 1341 PetscFunctionBegin; 1342 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatCreateDense" 1348 /*@C 1349 MatCreateDense - Creates a parallel matrix in dense format. 1350 1351 Collective on MPI_Comm 1352 1353 Input Parameters: 1354 + comm - MPI communicator 1355 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1356 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1357 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1358 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1359 - data - optional location of matrix data. Set data=NULL (NULL_SCALAR for Fortran users) for PETSc 1360 to control all matrix memory allocation. 1361 1362 Output Parameter: 1363 . A - the matrix 1364 1365 Notes: 1366 The dense format is fully compatible with standard Fortran 77 1367 storage by columns. 1368 1369 The data input variable is intended primarily for Fortran programmers 1370 who wish to allocate their own matrix memory space. Most users should 1371 set data=NULL (NULL_SCALAR for Fortran users). 1372 1373 The user MUST specify either the local or global matrix dimensions 1374 (possibly both). 1375 1376 Level: intermediate 1377 1378 .keywords: matrix,dense, parallel 1379 1380 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1381 @*/ 1382 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1383 { 1384 PetscErrorCode ierr; 1385 PetscMPIInt size; 1386 1387 PetscFunctionBegin; 1388 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1389 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1390 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1391 if (size > 1) { 1392 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1393 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1394 } else { 1395 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1396 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatDuplicate_MPIDense" 1403 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1404 { 1405 Mat mat; 1406 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1407 PetscErrorCode ierr; 1408 1409 PetscFunctionBegin; 1410 *newmat = 0; 1411 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1412 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1413 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1414 a = (Mat_MPIDense*)mat->data; 1415 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1416 1417 mat->factortype = A->factortype; 1418 mat->assembled = PETSC_TRUE; 1419 mat->preallocated = PETSC_TRUE; 1420 1421 a->size = oldmat->size; 1422 a->rank = oldmat->rank; 1423 mat->insertmode = NOT_SET_VALUES; 1424 a->nvec = oldmat->nvec; 1425 a->donotstash = oldmat->donotstash; 1426 1427 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1428 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1429 1430 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1431 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1432 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1433 1434 *newmat = mat; 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1440 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1441 { 1442 PetscErrorCode ierr; 1443 PetscMPIInt rank,size; 1444 PetscInt *rowners,i,m,nz,j; 1445 PetscScalar *array,*vals,*vals_ptr; 1446 1447 PetscFunctionBegin; 1448 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1449 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1450 1451 /* determine ownership of all rows */ 1452 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1453 else m = newmat->rmap->n; 1454 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1455 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1456 rowners[0] = 0; 1457 for (i=2; i<=size; i++) { 1458 rowners[i] += rowners[i-1]; 1459 } 1460 1461 if (!sizesset) { 1462 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1463 } 1464 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1465 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1466 1467 if (!rank) { 1468 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1469 1470 /* read in my part of the matrix numerical values */ 1471 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1472 1473 /* insert into matrix-by row (this is why cannot directly read into array */ 1474 vals_ptr = vals; 1475 for (i=0; i<m; i++) { 1476 for (j=0; j<N; j++) { 1477 array[i + j*m] = *vals_ptr++; 1478 } 1479 } 1480 1481 /* read in other processors and ship out */ 1482 for (i=1; i<size; i++) { 1483 nz = (rowners[i+1] - rowners[i])*N; 1484 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1485 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1486 } 1487 } else { 1488 /* receive numeric values */ 1489 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1490 1491 /* receive message of values*/ 1492 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1493 1494 /* insert into matrix-by row (this is why cannot directly read into array */ 1495 vals_ptr = vals; 1496 for (i=0; i<m; i++) { 1497 for (j=0; j<N; j++) { 1498 array[i + j*m] = *vals_ptr++; 1499 } 1500 } 1501 } 1502 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1503 ierr = PetscFree(rowners);CHKERRQ(ierr); 1504 ierr = PetscFree(vals);CHKERRQ(ierr); 1505 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1506 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1507 PetscFunctionReturn(0); 1508 } 1509 1510 #undef __FUNCT__ 1511 #define __FUNCT__ "MatLoad_MPIDense" 1512 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1513 { 1514 PetscScalar *vals,*svals; 1515 MPI_Comm comm; 1516 MPI_Status status; 1517 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1518 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1519 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1520 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1521 int fd; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1527 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1528 if (!rank) { 1529 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1530 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1531 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1532 } 1533 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1534 1535 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1536 M = header[1]; N = header[2]; nz = header[3]; 1537 1538 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1539 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1540 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1541 1542 /* If global sizes are set, check if they are consistent with that given in the file */ 1543 if (sizesset) { 1544 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1545 } 1546 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1547 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1548 1549 /* 1550 Handle case where matrix is stored on disk as a dense matrix 1551 */ 1552 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1553 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1554 PetscFunctionReturn(0); 1555 } 1556 1557 /* determine ownership of all rows */ 1558 if (newmat->rmap->n < 0) { 1559 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1560 } else { 1561 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1562 } 1563 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1564 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1565 rowners[0] = 0; 1566 for (i=2; i<=size; i++) { 1567 rowners[i] += rowners[i-1]; 1568 } 1569 rstart = rowners[rank]; 1570 rend = rowners[rank+1]; 1571 1572 /* distribute row lengths to all processors */ 1573 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1574 if (!rank) { 1575 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1576 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1577 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1578 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1579 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1580 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1581 } else { 1582 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1583 } 1584 1585 if (!rank) { 1586 /* calculate the number of nonzeros on each processor */ 1587 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1588 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1589 for (i=0; i<size; i++) { 1590 for (j=rowners[i]; j< rowners[i+1]; j++) { 1591 procsnz[i] += rowlengths[j]; 1592 } 1593 } 1594 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1595 1596 /* determine max buffer needed and allocate it */ 1597 maxnz = 0; 1598 for (i=0; i<size; i++) { 1599 maxnz = PetscMax(maxnz,procsnz[i]); 1600 } 1601 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1602 1603 /* read in my part of the matrix column indices */ 1604 nz = procsnz[0]; 1605 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1606 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1607 1608 /* read in every one elses and ship off */ 1609 for (i=1; i<size; i++) { 1610 nz = procsnz[i]; 1611 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1612 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1613 } 1614 ierr = PetscFree(cols);CHKERRQ(ierr); 1615 } else { 1616 /* determine buffer space needed for message */ 1617 nz = 0; 1618 for (i=0; i<m; i++) { 1619 nz += ourlens[i]; 1620 } 1621 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1622 1623 /* receive message of column indices*/ 1624 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1625 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1626 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1627 } 1628 1629 /* loop over local rows, determining number of off diagonal entries */ 1630 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1631 jj = 0; 1632 for (i=0; i<m; i++) { 1633 for (j=0; j<ourlens[i]; j++) { 1634 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1635 jj++; 1636 } 1637 } 1638 1639 /* create our matrix */ 1640 for (i=0; i<m; i++) ourlens[i] -= offlens[i]; 1641 1642 if (!sizesset) { 1643 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1644 } 1645 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1646 for (i=0; i<m; i++) ourlens[i] += offlens[i]; 1647 1648 if (!rank) { 1649 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1650 1651 /* read in my part of the matrix numerical values */ 1652 nz = procsnz[0]; 1653 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1654 1655 /* insert into matrix */ 1656 jj = rstart; 1657 smycols = mycols; 1658 svals = vals; 1659 for (i=0; i<m; i++) { 1660 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1661 smycols += ourlens[i]; 1662 svals += ourlens[i]; 1663 jj++; 1664 } 1665 1666 /* read in other processors and ship out */ 1667 for (i=1; i<size; i++) { 1668 nz = procsnz[i]; 1669 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1670 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1671 } 1672 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1673 } else { 1674 /* receive numeric values */ 1675 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1676 1677 /* receive message of values*/ 1678 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1679 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1680 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1681 1682 /* insert into matrix */ 1683 jj = rstart; 1684 smycols = mycols; 1685 svals = vals; 1686 for (i=0; i<m; i++) { 1687 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1688 smycols += ourlens[i]; 1689 svals += ourlens[i]; 1690 jj++; 1691 } 1692 } 1693 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 1694 ierr = PetscFree(vals);CHKERRQ(ierr); 1695 ierr = PetscFree(mycols);CHKERRQ(ierr); 1696 ierr = PetscFree(rowners);CHKERRQ(ierr); 1697 1698 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1699 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatEqual_MPIDense" 1705 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1706 { 1707 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1708 Mat a,b; 1709 PetscBool flg; 1710 PetscErrorCode ierr; 1711 1712 PetscFunctionBegin; 1713 a = matA->A; 1714 b = matB->A; 1715 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1716 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720