1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatGetRow_MPIDense" 40 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 41 { 42 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 43 PetscErrorCode ierr; 44 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 45 46 PetscFunctionBegin; 47 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 48 lrow = row - rstart; 49 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatRestoreRow_MPIDense" 55 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 56 { 57 PetscErrorCode ierr; 58 59 PetscFunctionBegin; 60 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 61 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 62 PetscFunctionReturn(0); 63 } 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 68 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 69 { 70 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 71 PetscErrorCode ierr; 72 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 73 PetscScalar *array; 74 MPI_Comm comm; 75 Mat B; 76 77 PetscFunctionBegin; 78 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 79 80 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 81 if (!B) { 82 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 83 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 84 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 85 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 86 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 87 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 88 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 89 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 90 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 92 *a = B; 93 ierr = MatDestroy(&B);CHKERRQ(ierr); 94 } else *a = B; 95 PetscFunctionReturn(0); 96 } 97 EXTERN_C_END 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatSetValues_MPIDense" 101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 102 { 103 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 104 PetscErrorCode ierr; 105 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 106 PetscBool roworiented = A->roworiented; 107 108 PetscFunctionBegin; 109 if (v) PetscValidScalarPointer(v,6); 110 for (i=0; i<m; i++) { 111 if (idxm[i] < 0) continue; 112 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 113 if (idxm[i] >= rstart && idxm[i] < rend) { 114 row = idxm[i] - rstart; 115 if (roworiented) { 116 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 117 } else { 118 for (j=0; j<n; j++) { 119 if (idxn[j] < 0) continue; 120 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 121 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 122 } 123 } 124 } else if (!A->donotstash) { 125 mat->assembled = PETSC_FALSE; 126 if (roworiented) { 127 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 128 } else { 129 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 130 } 131 } 132 } 133 PetscFunctionReturn(0); 134 } 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatGetValues_MPIDense" 138 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 139 { 140 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 141 PetscErrorCode ierr; 142 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 143 144 PetscFunctionBegin; 145 for (i=0; i<m; i++) { 146 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 147 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 148 if (idxm[i] >= rstart && idxm[i] < rend) { 149 row = idxm[i] - rstart; 150 for (j=0; j<n; j++) { 151 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 152 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 153 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 154 } 155 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 156 } 157 PetscFunctionReturn(0); 158 } 159 160 #undef __FUNCT__ 161 #define __FUNCT__ "MatDenseGetArray_MPIDense" 162 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 163 { 164 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 165 PetscErrorCode ierr; 166 167 PetscFunctionBegin; 168 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 174 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 175 { 176 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 177 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 178 PetscErrorCode ierr; 179 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 180 const PetscInt *irow,*icol; 181 PetscScalar *av,*bv,*v = lmat->v; 182 Mat newmat; 183 IS iscol_local; 184 185 PetscFunctionBegin; 186 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 187 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 188 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 189 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 190 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 191 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 192 193 /* No parallel redistribution currently supported! Should really check each index set 194 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 195 original matrix! */ 196 197 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 198 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 199 200 /* Check submatrix call */ 201 if (scall == MAT_REUSE_MATRIX) { 202 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 203 /* Really need to test rows and column sizes! */ 204 newmat = *B; 205 } else { 206 /* Create and fill new matrix */ 207 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 208 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 209 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 210 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 211 } 212 213 /* Now extract the data pointers and do the copy, column at a time */ 214 newmatd = (Mat_MPIDense*)newmat->data; 215 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 216 217 for (i=0; i<Ncols; i++) { 218 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 219 for (j=0; j<nrows; j++) { 220 *bv++ = av[irow[j] - rstart]; 221 } 222 } 223 224 /* Assemble the matrices so that the correct flags are set */ 225 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 226 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 227 228 /* Free work space */ 229 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 230 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 231 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 232 *B = newmat; 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 238 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 239 { 240 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 241 PetscErrorCode ierr; 242 243 PetscFunctionBegin; 244 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 250 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 251 { 252 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 253 MPI_Comm comm; 254 PetscErrorCode ierr; 255 PetscInt nstash,reallocs; 256 InsertMode addv; 257 258 PetscFunctionBegin; 259 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 260 /* make sure all processors are either in INSERTMODE or ADDMODE */ 261 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 262 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 263 mat->insertmode = addv; /* in case this processor had no cache */ 264 265 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 266 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 267 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 273 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 274 { 275 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 276 PetscErrorCode ierr; 277 PetscInt i,*row,*col,flg,j,rstart,ncols; 278 PetscMPIInt n; 279 PetscScalar *val; 280 InsertMode addv=mat->insertmode; 281 282 PetscFunctionBegin; 283 /* wait on receives */ 284 while (1) { 285 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 286 if (!flg) break; 287 288 for (i=0; i<n;) { 289 /* Now identify the consecutive vals belonging to the same row */ 290 for (j=i,rstart=row[j]; j<n; j++) { 291 if (row[j] != rstart) break; 292 } 293 if (j < n) ncols = j-i; 294 else ncols = n-i; 295 /* Now assemble all these values with a single function call */ 296 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 297 i = j; 298 } 299 } 300 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 301 302 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 303 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 304 305 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 306 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 307 } 308 PetscFunctionReturn(0); 309 } 310 311 #undef __FUNCT__ 312 #define __FUNCT__ "MatZeroEntries_MPIDense" 313 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 314 { 315 PetscErrorCode ierr; 316 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 317 318 PetscFunctionBegin; 319 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 /* the code does not do the diagonal entries correctly unless the 324 matrix is square and the column and row owerships are identical. 325 This is a BUG. The only way to fix it seems to be to access 326 mdn->A and mdn->B directly and not through the MatZeroRows() 327 routine. 328 */ 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatZeroRows_MPIDense" 331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 332 { 333 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 334 PetscErrorCode ierr; 335 PetscInt i,*owners = A->rmap->range; 336 PetscInt *nprocs,j,idx,nsends; 337 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 338 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 339 PetscInt *lens,*lrows,*values; 340 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 341 MPI_Comm comm; 342 MPI_Request *send_waits,*recv_waits; 343 MPI_Status recv_status,*send_status; 344 PetscBool found; 345 const PetscScalar *xx; 346 PetscScalar *bb; 347 348 PetscFunctionBegin; 349 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 350 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 351 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 352 /* first count number of contributors to each processor */ 353 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 354 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 355 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 356 for (i=0; i<N; i++) { 357 idx = rows[i]; 358 found = PETSC_FALSE; 359 for (j=0; j<size; j++) { 360 if (idx >= owners[j] && idx < owners[j+1]) { 361 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 362 } 363 } 364 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 365 } 366 nsends = 0; 367 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 368 369 /* inform other processors of number of messages and max length*/ 370 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 371 372 /* post receives: */ 373 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 374 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 375 for (i=0; i<nrecvs; i++) { 376 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 377 } 378 379 /* do sends: 380 1) starts[i] gives the starting index in svalues for stuff going to 381 the ith processor 382 */ 383 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 384 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 385 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 386 387 starts[0] = 0; 388 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 389 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 390 391 starts[0] = 0; 392 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 393 count = 0; 394 for (i=0; i<size; i++) { 395 if (nprocs[2*i+1]) { 396 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 397 } 398 } 399 ierr = PetscFree(starts);CHKERRQ(ierr); 400 401 base = owners[rank]; 402 403 /* wait on receives */ 404 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 405 count = nrecvs; 406 slen = 0; 407 while (count) { 408 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 409 /* unpack receives into our local space */ 410 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 411 412 source[imdex] = recv_status.MPI_SOURCE; 413 lens[imdex] = n; 414 slen += n; 415 count--; 416 } 417 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 418 419 /* move the data into the send scatter */ 420 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 421 count = 0; 422 for (i=0; i<nrecvs; i++) { 423 values = rvalues + i*nmax; 424 for (j=0; j<lens[i]; j++) { 425 lrows[count++] = values[j] - base; 426 } 427 } 428 ierr = PetscFree(rvalues);CHKERRQ(ierr); 429 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 430 ierr = PetscFree(owner);CHKERRQ(ierr); 431 ierr = PetscFree(nprocs);CHKERRQ(ierr); 432 433 /* fix right hand side if needed */ 434 if (x && b) { 435 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 436 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 437 for (i=0; i<slen; i++) { 438 bb[lrows[i]] = diag*xx[lrows[i]]; 439 } 440 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 441 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 442 } 443 444 /* actually zap the local rows */ 445 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 446 if (diag != 0.0) { 447 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 448 PetscInt m = ll->lda, i; 449 450 for (i=0; i<slen; i++) { 451 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 452 } 453 } 454 ierr = PetscFree(lrows);CHKERRQ(ierr); 455 456 /* wait on sends */ 457 if (nsends) { 458 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 459 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 460 ierr = PetscFree(send_status);CHKERRQ(ierr); 461 } 462 ierr = PetscFree(send_waits);CHKERRQ(ierr); 463 ierr = PetscFree(svalues);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatMult_MPIDense" 469 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 470 { 471 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 472 PetscErrorCode ierr; 473 474 PetscFunctionBegin; 475 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "MatMultAdd_MPIDense" 483 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 484 { 485 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 490 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 491 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatMultTranspose_MPIDense" 497 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 498 { 499 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 500 PetscErrorCode ierr; 501 PetscScalar zero = 0.0; 502 503 PetscFunctionBegin; 504 ierr = VecSet(yy,zero);CHKERRQ(ierr); 505 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 506 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 507 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNCT__ 512 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 513 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 514 { 515 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 516 PetscErrorCode ierr; 517 518 PetscFunctionBegin; 519 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 520 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 521 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 522 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatGetDiagonal_MPIDense" 528 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 529 { 530 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 531 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 532 PetscErrorCode ierr; 533 PetscInt len,i,n,m = A->rmap->n,radd; 534 PetscScalar *x,zero = 0.0; 535 536 PetscFunctionBegin; 537 ierr = VecSet(v,zero);CHKERRQ(ierr); 538 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 539 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 540 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 541 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 542 radd = A->rmap->rstart*m; 543 for (i=0; i<len; i++) { 544 x[i] = aloc->v[radd + i*m + i]; 545 } 546 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 547 PetscFunctionReturn(0); 548 } 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "MatDestroy_MPIDense" 552 PetscErrorCode MatDestroy_MPIDense(Mat mat) 553 { 554 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 555 PetscErrorCode ierr; 556 557 PetscFunctionBegin; 558 #if defined(PETSC_USE_LOG) 559 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 560 #endif 561 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 562 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 563 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 564 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 565 566 ierr = PetscFree(mat->data);CHKERRQ(ierr); 567 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 568 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 569 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",NULL);CHKERRQ(ierr); 570 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr); 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatView_MPIDense_Binary" 578 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 579 { 580 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 581 PetscErrorCode ierr; 582 PetscViewerFormat format; 583 int fd; 584 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 585 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 586 PetscScalar *work,*v,*vv; 587 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 588 589 PetscFunctionBegin; 590 if (mdn->size == 1) { 591 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 592 } else { 593 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 594 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 595 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 596 597 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 598 if (format == PETSC_VIEWER_NATIVE) { 599 600 if (!rank) { 601 /* store the matrix as a dense matrix */ 602 header[0] = MAT_FILE_CLASSID; 603 header[1] = mat->rmap->N; 604 header[2] = N; 605 header[3] = MATRIX_BINARY_FORMAT_DENSE; 606 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 607 608 /* get largest work array needed for transposing array */ 609 mmax = mat->rmap->n; 610 for (i=1; i<size; i++) { 611 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 612 } 613 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 614 615 /* write out local array, by rows */ 616 m = mat->rmap->n; 617 v = a->v; 618 for (j=0; j<N; j++) { 619 for (i=0; i<m; i++) { 620 work[j + i*N] = *v++; 621 } 622 } 623 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 624 /* get largest work array to receive messages from other processes, excludes process zero */ 625 mmax = 0; 626 for (i=1; i<size; i++) { 627 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 628 } 629 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 630 for (k = 1; k < size; k++) { 631 v = vv; 632 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 633 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 634 635 for (j = 0; j < N; j++) { 636 for (i = 0; i < m; i++) { 637 work[j + i*N] = *v++; 638 } 639 } 640 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 641 } 642 ierr = PetscFree(work);CHKERRQ(ierr); 643 ierr = PetscFree(vv);CHKERRQ(ierr); 644 } else { 645 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 646 } 647 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 648 } 649 PetscFunctionReturn(0); 650 } 651 652 #include <petscdraw.h> 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 655 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 656 { 657 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 658 PetscErrorCode ierr; 659 PetscMPIInt size = mdn->size,rank = mdn->rank; 660 PetscViewerType vtype; 661 PetscBool iascii,isdraw; 662 PetscViewer sviewer; 663 PetscViewerFormat format; 664 665 PetscFunctionBegin; 666 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 667 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 668 if (iascii) { 669 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 670 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 671 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 672 MatInfo info; 673 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 674 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 675 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 676 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 677 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 679 PetscFunctionReturn(0); 680 } else if (format == PETSC_VIEWER_ASCII_INFO) { 681 PetscFunctionReturn(0); 682 } 683 } else if (isdraw) { 684 PetscDraw draw; 685 PetscBool isnull; 686 687 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 688 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 689 if (isnull) PetscFunctionReturn(0); 690 } 691 692 if (size == 1) { 693 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 694 } else { 695 /* assemble the entire matrix onto first processor. */ 696 Mat A; 697 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 698 PetscInt *cols; 699 PetscScalar *vals; 700 701 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 702 if (!rank) { 703 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 704 } else { 705 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 706 } 707 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 708 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 709 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 710 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 711 712 /* Copy the matrix ... This isn't the most efficient means, 713 but it's quick for now */ 714 A->insertmode = INSERT_VALUES; 715 716 row = mat->rmap->rstart; 717 m = mdn->A->rmap->n; 718 for (i=0; i<m; i++) { 719 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 720 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 721 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 722 row++; 723 } 724 725 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 726 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 727 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 728 if (!rank) { 729 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 730 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 731 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 732 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 733 } 734 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 735 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 736 ierr = MatDestroy(&A);CHKERRQ(ierr); 737 } 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatView_MPIDense" 743 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 744 { 745 PetscErrorCode ierr; 746 PetscBool iascii,isbinary,isdraw,issocket; 747 748 PetscFunctionBegin; 749 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 750 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 751 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 752 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 753 754 if (iascii || issocket || isdraw) { 755 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 756 } else if (isbinary) { 757 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 758 } 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatGetInfo_MPIDense" 764 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 765 { 766 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 767 Mat mdn = mat->A; 768 PetscErrorCode ierr; 769 PetscReal isend[5],irecv[5]; 770 771 PetscFunctionBegin; 772 info->block_size = 1.0; 773 774 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 775 776 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 777 isend[3] = info->memory; isend[4] = info->mallocs; 778 if (flag == MAT_LOCAL) { 779 info->nz_used = isend[0]; 780 info->nz_allocated = isend[1]; 781 info->nz_unneeded = isend[2]; 782 info->memory = isend[3]; 783 info->mallocs = isend[4]; 784 } else if (flag == MAT_GLOBAL_MAX) { 785 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 786 787 info->nz_used = irecv[0]; 788 info->nz_allocated = irecv[1]; 789 info->nz_unneeded = irecv[2]; 790 info->memory = irecv[3]; 791 info->mallocs = irecv[4]; 792 } else if (flag == MAT_GLOBAL_SUM) { 793 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 794 795 info->nz_used = irecv[0]; 796 info->nz_allocated = irecv[1]; 797 info->nz_unneeded = irecv[2]; 798 info->memory = irecv[3]; 799 info->mallocs = irecv[4]; 800 } 801 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 802 info->fill_ratio_needed = 0; 803 info->factor_mallocs = 0; 804 PetscFunctionReturn(0); 805 } 806 807 #undef __FUNCT__ 808 #define __FUNCT__ "MatSetOption_MPIDense" 809 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 810 { 811 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 812 PetscErrorCode ierr; 813 814 PetscFunctionBegin; 815 switch (op) { 816 case MAT_NEW_NONZERO_LOCATIONS: 817 case MAT_NEW_NONZERO_LOCATION_ERR: 818 case MAT_NEW_NONZERO_ALLOCATION_ERR: 819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 820 break; 821 case MAT_ROW_ORIENTED: 822 a->roworiented = flg; 823 824 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 825 break; 826 case MAT_NEW_DIAGONALS: 827 case MAT_KEEP_NONZERO_PATTERN: 828 case MAT_USE_HASH_TABLE: 829 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 830 break; 831 case MAT_IGNORE_OFF_PROC_ENTRIES: 832 a->donotstash = flg; 833 break; 834 case MAT_SYMMETRIC: 835 case MAT_STRUCTURALLY_SYMMETRIC: 836 case MAT_HERMITIAN: 837 case MAT_SYMMETRY_ETERNAL: 838 case MAT_IGNORE_LOWER_TRIANGULAR: 839 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 840 break; 841 default: 842 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 843 } 844 PetscFunctionReturn(0); 845 } 846 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatDiagonalScale_MPIDense" 850 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 851 { 852 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 853 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 854 PetscScalar *l,*r,x,*v; 855 PetscErrorCode ierr; 856 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 857 858 PetscFunctionBegin; 859 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 860 if (ll) { 861 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 862 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 863 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 864 for (i=0; i<m; i++) { 865 x = l[i]; 866 v = mat->v + i; 867 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 868 } 869 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 870 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 871 } 872 if (rr) { 873 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 874 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 875 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 876 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 878 for (i=0; i<n; i++) { 879 x = r[i]; 880 v = mat->v + i*m; 881 for (j=0; j<m; j++) (*v++) *= x; 882 } 883 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 884 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 885 } 886 PetscFunctionReturn(0); 887 } 888 889 #undef __FUNCT__ 890 #define __FUNCT__ "MatNorm_MPIDense" 891 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 892 { 893 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 894 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 895 PetscErrorCode ierr; 896 PetscInt i,j; 897 PetscReal sum = 0.0; 898 PetscScalar *v = mat->v; 899 900 PetscFunctionBegin; 901 if (mdn->size == 1) { 902 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 903 } else { 904 if (type == NORM_FROBENIUS) { 905 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 906 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 907 } 908 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 909 *nrm = PetscSqrtReal(*nrm); 910 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 911 } else if (type == NORM_1) { 912 PetscReal *tmp,*tmp2; 913 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 914 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 915 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 916 *nrm = 0.0; 917 v = mat->v; 918 for (j=0; j<mdn->A->cmap->n; j++) { 919 for (i=0; i<mdn->A->rmap->n; i++) { 920 tmp[j] += PetscAbsScalar(*v); v++; 921 } 922 } 923 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 924 for (j=0; j<A->cmap->N; j++) { 925 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 926 } 927 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 928 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 929 } else if (type == NORM_INFINITY) { /* max row norm */ 930 PetscReal ntemp; 931 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 932 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 933 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 934 } 935 PetscFunctionReturn(0); 936 } 937 938 #undef __FUNCT__ 939 #define __FUNCT__ "MatTranspose_MPIDense" 940 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 941 { 942 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 943 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 944 Mat B; 945 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 946 PetscErrorCode ierr; 947 PetscInt j,i; 948 PetscScalar *v; 949 950 PetscFunctionBegin; 951 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 952 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 953 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 954 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 955 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 956 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 957 } else { 958 B = *matout; 959 } 960 961 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 962 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 963 for (i=0; i<m; i++) rwork[i] = rstart + i; 964 for (j=0; j<n; j++) { 965 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 966 v += m; 967 } 968 ierr = PetscFree(rwork);CHKERRQ(ierr); 969 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 970 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 972 *matout = B; 973 } else { 974 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 975 } 976 PetscFunctionReturn(0); 977 } 978 979 980 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 981 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatSetUp_MPIDense" 985 PetscErrorCode MatSetUp_MPIDense(Mat A) 986 { 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatAXPY_MPIDense" 996 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 997 { 998 PetscErrorCode ierr; 999 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1000 1001 PetscFunctionBegin; 1002 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatConjugate_MPIDense" 1008 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1009 { 1010 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1011 PetscErrorCode ierr; 1012 1013 PetscFunctionBegin; 1014 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "MatRealPart_MPIDense" 1020 PetscErrorCode MatRealPart_MPIDense(Mat A) 1021 { 1022 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1023 PetscErrorCode ierr; 1024 1025 PetscFunctionBegin; 1026 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1032 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1033 { 1034 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1035 PetscErrorCode ierr; 1036 1037 PetscFunctionBegin; 1038 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1045 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1046 { 1047 PetscErrorCode ierr; 1048 PetscInt i,n; 1049 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1050 PetscReal *work; 1051 1052 PetscFunctionBegin; 1053 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1054 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 1055 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1056 if (type == NORM_2) { 1057 for (i=0; i<n; i++) work[i] *= work[i]; 1058 } 1059 if (type == NORM_INFINITY) { 1060 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1061 } else { 1062 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1063 } 1064 ierr = PetscFree(work);CHKERRQ(ierr); 1065 if (type == NORM_2) { 1066 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1067 } 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatSetRandom_MPIDense" 1073 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1074 { 1075 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1076 PetscErrorCode ierr; 1077 PetscScalar *a; 1078 PetscInt m,n,i; 1079 1080 PetscFunctionBegin; 1081 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1082 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1083 for (i=0; i<m*n; i++) { 1084 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1085 } 1086 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 /* -------------------------------------------------------------------*/ 1091 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1092 MatGetRow_MPIDense, 1093 MatRestoreRow_MPIDense, 1094 MatMult_MPIDense, 1095 /* 4*/ MatMultAdd_MPIDense, 1096 MatMultTranspose_MPIDense, 1097 MatMultTransposeAdd_MPIDense, 1098 0, 1099 0, 1100 0, 1101 /* 10*/ 0, 1102 0, 1103 0, 1104 0, 1105 MatTranspose_MPIDense, 1106 /* 15*/ MatGetInfo_MPIDense, 1107 MatEqual_MPIDense, 1108 MatGetDiagonal_MPIDense, 1109 MatDiagonalScale_MPIDense, 1110 MatNorm_MPIDense, 1111 /* 20*/ MatAssemblyBegin_MPIDense, 1112 MatAssemblyEnd_MPIDense, 1113 MatSetOption_MPIDense, 1114 MatZeroEntries_MPIDense, 1115 /* 24*/ MatZeroRows_MPIDense, 1116 0, 1117 0, 1118 0, 1119 0, 1120 /* 29*/ MatSetUp_MPIDense, 1121 0, 1122 0, 1123 0, 1124 0, 1125 /* 34*/ MatDuplicate_MPIDense, 1126 0, 1127 0, 1128 0, 1129 0, 1130 /* 39*/ MatAXPY_MPIDense, 1131 MatGetSubMatrices_MPIDense, 1132 0, 1133 MatGetValues_MPIDense, 1134 0, 1135 /* 44*/ 0, 1136 MatScale_MPIDense, 1137 0, 1138 0, 1139 0, 1140 /* 49*/ MatSetRandom_MPIDense, 1141 0, 1142 0, 1143 0, 1144 0, 1145 /* 54*/ 0, 1146 0, 1147 0, 1148 0, 1149 0, 1150 /* 59*/ MatGetSubMatrix_MPIDense, 1151 MatDestroy_MPIDense, 1152 MatView_MPIDense, 1153 0, 1154 0, 1155 /* 64*/ 0, 1156 0, 1157 0, 1158 0, 1159 0, 1160 /* 69*/ 0, 1161 0, 1162 0, 1163 0, 1164 0, 1165 /* 74*/ 0, 1166 0, 1167 0, 1168 0, 1169 0, 1170 /* 79*/ 0, 1171 0, 1172 0, 1173 0, 1174 /* 83*/ MatLoad_MPIDense, 1175 0, 1176 0, 1177 0, 1178 0, 1179 0, 1180 /* 89*/ 1181 0, 1182 0, 1183 0, 1184 0, 1185 0, 1186 /* 94*/ 0, 1187 0, 1188 0, 1189 0, 1190 0, 1191 /* 99*/ 0, 1192 0, 1193 0, 1194 MatConjugate_MPIDense, 1195 0, 1196 /*104*/ 0, 1197 MatRealPart_MPIDense, 1198 MatImaginaryPart_MPIDense, 1199 0, 1200 0, 1201 /*109*/ 0, 1202 0, 1203 0, 1204 0, 1205 0, 1206 /*114*/ 0, 1207 0, 1208 0, 1209 0, 1210 0, 1211 /*119*/ 0, 1212 0, 1213 0, 1214 0, 1215 0, 1216 /*124*/ 0, 1217 MatGetColumnNorms_MPIDense 1218 }; 1219 1220 EXTERN_C_BEGIN 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1223 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1224 { 1225 Mat_MPIDense *a; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 mat->preallocated = PETSC_TRUE; 1230 /* Note: For now, when data is specified above, this assumes the user correctly 1231 allocates the local dense storage space. We should add error checking. */ 1232 1233 a = (Mat_MPIDense*)mat->data; 1234 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1235 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1236 a->nvec = mat->cmap->n; 1237 1238 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1239 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1240 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1241 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1242 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 EXTERN_C_END 1246 1247 EXTERN_C_BEGIN 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatCreate_MPIDense" 1250 PetscErrorCode MatCreate_MPIDense(Mat mat) 1251 { 1252 Mat_MPIDense *a; 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1257 mat->data = (void*)a; 1258 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1259 1260 mat->insertmode = NOT_SET_VALUES; 1261 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1262 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1263 1264 /* build cache for off array entries formed */ 1265 a->donotstash = PETSC_FALSE; 1266 1267 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1268 1269 /* stuff used for matrix vector multiply */ 1270 a->lvec = 0; 1271 a->Mvctx = 0; 1272 a->roworiented = PETSC_TRUE; 1273 1274 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1276 1277 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1278 "MatGetDiagonalBlock_MPIDense", 1279 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1281 "MatMPIDenseSetPreallocation_MPIDense", 1282 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1284 "MatMatMult_MPIAIJ_MPIDense", 1285 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1287 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1288 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1290 "MatMatMultNumeric_MPIAIJ_MPIDense", 1291 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1292 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 EXTERN_C_END 1296 1297 /*MC 1298 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1299 1300 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1301 and MATMPIDENSE otherwise. 1302 1303 Options Database Keys: 1304 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1305 1306 Level: beginner 1307 1308 1309 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1310 M*/ 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1314 /*@C 1315 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1316 1317 Not collective 1318 1319 Input Parameters: 1320 . A - the matrix 1321 - data - optional location of matrix data. Set data=NULL for PETSc 1322 to control all matrix memory allocation. 1323 1324 Notes: 1325 The dense format is fully compatible with standard Fortran 77 1326 storage by columns. 1327 1328 The data input variable is intended primarily for Fortran programmers 1329 who wish to allocate their own matrix memory space. Most users should 1330 set data=NULL. 1331 1332 Level: intermediate 1333 1334 .keywords: matrix,dense, parallel 1335 1336 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1337 @*/ 1338 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1339 { 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr); 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #undef __FUNCT__ 1348 #define __FUNCT__ "MatCreateDense" 1349 /*@C 1350 MatCreateDense - Creates a parallel matrix in dense format. 1351 1352 Collective on MPI_Comm 1353 1354 Input Parameters: 1355 + comm - MPI communicator 1356 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1357 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1358 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1359 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1360 - data - optional location of matrix data. Set data=NULL (NULL_SCALAR for Fortran users) for PETSc 1361 to control all matrix memory allocation. 1362 1363 Output Parameter: 1364 . A - the matrix 1365 1366 Notes: 1367 The dense format is fully compatible with standard Fortran 77 1368 storage by columns. 1369 1370 The data input variable is intended primarily for Fortran programmers 1371 who wish to allocate their own matrix memory space. Most users should 1372 set data=NULL (NULL_SCALAR for Fortran users). 1373 1374 The user MUST specify either the local or global matrix dimensions 1375 (possibly both). 1376 1377 Level: intermediate 1378 1379 .keywords: matrix,dense, parallel 1380 1381 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1382 @*/ 1383 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1384 { 1385 PetscErrorCode ierr; 1386 PetscMPIInt size; 1387 1388 PetscFunctionBegin; 1389 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1390 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1391 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1392 if (size > 1) { 1393 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1394 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1395 } else { 1396 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1397 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1398 } 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatDuplicate_MPIDense" 1404 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1405 { 1406 Mat mat; 1407 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1408 PetscErrorCode ierr; 1409 1410 PetscFunctionBegin; 1411 *newmat = 0; 1412 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1413 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1414 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1415 a = (Mat_MPIDense*)mat->data; 1416 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1417 1418 mat->factortype = A->factortype; 1419 mat->assembled = PETSC_TRUE; 1420 mat->preallocated = PETSC_TRUE; 1421 1422 a->size = oldmat->size; 1423 a->rank = oldmat->rank; 1424 mat->insertmode = NOT_SET_VALUES; 1425 a->nvec = oldmat->nvec; 1426 a->donotstash = oldmat->donotstash; 1427 1428 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1429 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1430 1431 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1432 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1433 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1434 1435 *newmat = mat; 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1441 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1442 { 1443 PetscErrorCode ierr; 1444 PetscMPIInt rank,size; 1445 PetscInt *rowners,i,m,nz,j; 1446 PetscScalar *array,*vals,*vals_ptr; 1447 1448 PetscFunctionBegin; 1449 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1450 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1451 1452 /* determine ownership of all rows */ 1453 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1454 else m = newmat->rmap->n; 1455 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1456 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1457 rowners[0] = 0; 1458 for (i=2; i<=size; i++) { 1459 rowners[i] += rowners[i-1]; 1460 } 1461 1462 if (!sizesset) { 1463 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1464 } 1465 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1466 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1467 1468 if (!rank) { 1469 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1470 1471 /* read in my part of the matrix numerical values */ 1472 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1473 1474 /* insert into matrix-by row (this is why cannot directly read into array */ 1475 vals_ptr = vals; 1476 for (i=0; i<m; i++) { 1477 for (j=0; j<N; j++) { 1478 array[i + j*m] = *vals_ptr++; 1479 } 1480 } 1481 1482 /* read in other processors and ship out */ 1483 for (i=1; i<size; i++) { 1484 nz = (rowners[i+1] - rowners[i])*N; 1485 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1486 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1487 } 1488 } else { 1489 /* receive numeric values */ 1490 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1491 1492 /* receive message of values*/ 1493 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1494 1495 /* insert into matrix-by row (this is why cannot directly read into array */ 1496 vals_ptr = vals; 1497 for (i=0; i<m; i++) { 1498 for (j=0; j<N; j++) { 1499 array[i + j*m] = *vals_ptr++; 1500 } 1501 } 1502 } 1503 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1504 ierr = PetscFree(rowners);CHKERRQ(ierr); 1505 ierr = PetscFree(vals);CHKERRQ(ierr); 1506 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1507 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1508 PetscFunctionReturn(0); 1509 } 1510 1511 #undef __FUNCT__ 1512 #define __FUNCT__ "MatLoad_MPIDense" 1513 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1514 { 1515 PetscScalar *vals,*svals; 1516 MPI_Comm comm; 1517 MPI_Status status; 1518 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1519 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1520 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1521 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1522 int fd; 1523 PetscErrorCode ierr; 1524 1525 PetscFunctionBegin; 1526 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1527 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1528 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1529 if (!rank) { 1530 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1531 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1532 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1533 } 1534 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1535 1536 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1537 M = header[1]; N = header[2]; nz = header[3]; 1538 1539 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1540 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1541 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1542 1543 /* If global sizes are set, check if they are consistent with that given in the file */ 1544 if (sizesset) { 1545 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1546 } 1547 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1548 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1549 1550 /* 1551 Handle case where matrix is stored on disk as a dense matrix 1552 */ 1553 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1554 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 /* determine ownership of all rows */ 1559 if (newmat->rmap->n < 0) { 1560 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1561 } else { 1562 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1563 } 1564 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1565 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1566 rowners[0] = 0; 1567 for (i=2; i<=size; i++) { 1568 rowners[i] += rowners[i-1]; 1569 } 1570 rstart = rowners[rank]; 1571 rend = rowners[rank+1]; 1572 1573 /* distribute row lengths to all processors */ 1574 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1575 if (!rank) { 1576 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1577 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1578 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1579 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1580 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1581 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1582 } else { 1583 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1584 } 1585 1586 if (!rank) { 1587 /* calculate the number of nonzeros on each processor */ 1588 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1589 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1590 for (i=0; i<size; i++) { 1591 for (j=rowners[i]; j< rowners[i+1]; j++) { 1592 procsnz[i] += rowlengths[j]; 1593 } 1594 } 1595 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1596 1597 /* determine max buffer needed and allocate it */ 1598 maxnz = 0; 1599 for (i=0; i<size; i++) { 1600 maxnz = PetscMax(maxnz,procsnz[i]); 1601 } 1602 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1603 1604 /* read in my part of the matrix column indices */ 1605 nz = procsnz[0]; 1606 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1607 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1608 1609 /* read in every one elses and ship off */ 1610 for (i=1; i<size; i++) { 1611 nz = procsnz[i]; 1612 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1613 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1614 } 1615 ierr = PetscFree(cols);CHKERRQ(ierr); 1616 } else { 1617 /* determine buffer space needed for message */ 1618 nz = 0; 1619 for (i=0; i<m; i++) { 1620 nz += ourlens[i]; 1621 } 1622 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1623 1624 /* receive message of column indices*/ 1625 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1626 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1627 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1628 } 1629 1630 /* loop over local rows, determining number of off diagonal entries */ 1631 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1632 jj = 0; 1633 for (i=0; i<m; i++) { 1634 for (j=0; j<ourlens[i]; j++) { 1635 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1636 jj++; 1637 } 1638 } 1639 1640 /* create our matrix */ 1641 for (i=0; i<m; i++) ourlens[i] -= offlens[i]; 1642 1643 if (!sizesset) { 1644 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1645 } 1646 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1647 for (i=0; i<m; i++) ourlens[i] += offlens[i]; 1648 1649 if (!rank) { 1650 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1651 1652 /* read in my part of the matrix numerical values */ 1653 nz = procsnz[0]; 1654 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1655 1656 /* insert into matrix */ 1657 jj = rstart; 1658 smycols = mycols; 1659 svals = vals; 1660 for (i=0; i<m; i++) { 1661 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1662 smycols += ourlens[i]; 1663 svals += ourlens[i]; 1664 jj++; 1665 } 1666 1667 /* read in other processors and ship out */ 1668 for (i=1; i<size; i++) { 1669 nz = procsnz[i]; 1670 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1671 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1672 } 1673 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1674 } else { 1675 /* receive numeric values */ 1676 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1677 1678 /* receive message of values*/ 1679 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1680 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1681 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1682 1683 /* insert into matrix */ 1684 jj = rstart; 1685 smycols = mycols; 1686 svals = vals; 1687 for (i=0; i<m; i++) { 1688 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1689 smycols += ourlens[i]; 1690 svals += ourlens[i]; 1691 jj++; 1692 } 1693 } 1694 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 1695 ierr = PetscFree(vals);CHKERRQ(ierr); 1696 ierr = PetscFree(mycols);CHKERRQ(ierr); 1697 ierr = PetscFree(rowners);CHKERRQ(ierr); 1698 1699 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatEqual_MPIDense" 1706 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1707 { 1708 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1709 Mat a,b; 1710 PetscBool flg; 1711 PetscErrorCode ierr; 1712 1713 PetscFunctionBegin; 1714 a = matA->A; 1715 b = matB->A; 1716 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1717 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1718 PetscFunctionReturn(0); 1719 } 1720 1721