1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatGetRow_MPIDense" 40 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 41 { 42 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 43 PetscErrorCode ierr; 44 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 45 46 PetscFunctionBegin; 47 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 48 lrow = row - rstart; 49 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatRestoreRow_MPIDense" 55 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 56 { 57 PetscErrorCode ierr; 58 59 PetscFunctionBegin; 60 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 61 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 62 PetscFunctionReturn(0); 63 } 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 68 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 69 { 70 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 71 PetscErrorCode ierr; 72 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 73 PetscScalar *array; 74 MPI_Comm comm; 75 Mat B; 76 77 PetscFunctionBegin; 78 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 79 80 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 81 if (!B) { 82 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 83 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 84 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 85 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 86 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 87 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 88 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 89 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 90 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 92 *a = B; 93 ierr = MatDestroy(&B);CHKERRQ(ierr); 94 } else *a = B; 95 PetscFunctionReturn(0); 96 } 97 EXTERN_C_END 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatSetValues_MPIDense" 101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 102 { 103 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 104 PetscErrorCode ierr; 105 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 106 PetscBool roworiented = A->roworiented; 107 108 PetscFunctionBegin; 109 if (v) PetscValidScalarPointer(v,6); 110 for (i=0; i<m; i++) { 111 if (idxm[i] < 0) continue; 112 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 113 if (idxm[i] >= rstart && idxm[i] < rend) { 114 row = idxm[i] - rstart; 115 if (roworiented) { 116 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 117 } else { 118 for (j=0; j<n; j++) { 119 if (idxn[j] < 0) continue; 120 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 121 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 122 } 123 } 124 } else if (!A->donotstash) { 125 mat->assembled = PETSC_FALSE; 126 if (roworiented) { 127 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 128 } else { 129 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 130 } 131 } 132 } 133 PetscFunctionReturn(0); 134 } 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatGetValues_MPIDense" 138 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 139 { 140 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 141 PetscErrorCode ierr; 142 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 143 144 PetscFunctionBegin; 145 for (i=0; i<m; i++) { 146 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 147 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 148 if (idxm[i] >= rstart && idxm[i] < rend) { 149 row = idxm[i] - rstart; 150 for (j=0; j<n; j++) { 151 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 152 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 153 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 154 } 155 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 156 } 157 PetscFunctionReturn(0); 158 } 159 160 #undef __FUNCT__ 161 #define __FUNCT__ "MatDenseGetArray_MPIDense" 162 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 163 { 164 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 165 PetscErrorCode ierr; 166 167 PetscFunctionBegin; 168 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 174 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 175 { 176 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 177 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 178 PetscErrorCode ierr; 179 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 180 const PetscInt *irow,*icol; 181 PetscScalar *av,*bv,*v = lmat->v; 182 Mat newmat; 183 IS iscol_local; 184 185 PetscFunctionBegin; 186 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 187 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 188 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 189 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 190 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 191 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 192 193 /* No parallel redistribution currently supported! Should really check each index set 194 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 195 original matrix! */ 196 197 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 198 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 199 200 /* Check submatrix call */ 201 if (scall == MAT_REUSE_MATRIX) { 202 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 203 /* Really need to test rows and column sizes! */ 204 newmat = *B; 205 } else { 206 /* Create and fill new matrix */ 207 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 208 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 209 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 210 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 211 } 212 213 /* Now extract the data pointers and do the copy, column at a time */ 214 newmatd = (Mat_MPIDense*)newmat->data; 215 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 216 217 for (i=0; i<Ncols; i++) { 218 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 219 for (j=0; j<nrows; j++) { 220 *bv++ = av[irow[j] - rstart]; 221 } 222 } 223 224 /* Assemble the matrices so that the correct flags are set */ 225 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 226 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 227 228 /* Free work space */ 229 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 230 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 231 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 232 *B = newmat; 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 238 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 239 { 240 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 241 PetscErrorCode ierr; 242 243 PetscFunctionBegin; 244 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 250 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 251 { 252 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 253 MPI_Comm comm; 254 PetscErrorCode ierr; 255 PetscInt nstash,reallocs; 256 InsertMode addv; 257 258 PetscFunctionBegin; 259 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 260 /* make sure all processors are either in INSERTMODE or ADDMODE */ 261 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 262 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 263 mat->insertmode = addv; /* in case this processor had no cache */ 264 265 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 266 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 267 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 273 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 274 { 275 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 276 PetscErrorCode ierr; 277 PetscInt i,*row,*col,flg,j,rstart,ncols; 278 PetscMPIInt n; 279 PetscScalar *val; 280 InsertMode addv=mat->insertmode; 281 282 PetscFunctionBegin; 283 /* wait on receives */ 284 while (1) { 285 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 286 if (!flg) break; 287 288 for (i=0; i<n;) { 289 /* Now identify the consecutive vals belonging to the same row */ 290 for (j=i,rstart=row[j]; j<n; j++) { 291 if (row[j] != rstart) break; 292 } 293 if (j < n) ncols = j-i; 294 else ncols = n-i; 295 /* Now assemble all these values with a single function call */ 296 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 297 i = j; 298 } 299 } 300 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 301 302 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 303 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 304 305 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 306 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 307 } 308 PetscFunctionReturn(0); 309 } 310 311 #undef __FUNCT__ 312 #define __FUNCT__ "MatZeroEntries_MPIDense" 313 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 314 { 315 PetscErrorCode ierr; 316 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 317 318 PetscFunctionBegin; 319 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 /* the code does not do the diagonal entries correctly unless the 324 matrix is square and the column and row owerships are identical. 325 This is a BUG. The only way to fix it seems to be to access 326 mdn->A and mdn->B directly and not through the MatZeroRows() 327 routine. 328 */ 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatZeroRows_MPIDense" 331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 332 { 333 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 334 PetscErrorCode ierr; 335 PetscInt i,*owners = A->rmap->range; 336 PetscInt *nprocs,j,idx,nsends; 337 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 338 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 339 PetscInt *lens,*lrows,*values; 340 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 341 MPI_Comm comm; 342 MPI_Request *send_waits,*recv_waits; 343 MPI_Status recv_status,*send_status; 344 PetscBool found; 345 const PetscScalar *xx; 346 PetscScalar *bb; 347 348 PetscFunctionBegin; 349 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 350 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 351 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 352 /* first count number of contributors to each processor */ 353 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 354 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 355 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 356 for (i=0; i<N; i++) { 357 idx = rows[i]; 358 found = PETSC_FALSE; 359 for (j=0; j<size; j++) { 360 if (idx >= owners[j] && idx < owners[j+1]) { 361 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 362 } 363 } 364 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 365 } 366 nsends = 0; 367 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 368 369 /* inform other processors of number of messages and max length*/ 370 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 371 372 /* post receives: */ 373 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 374 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 375 for (i=0; i<nrecvs; i++) { 376 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 377 } 378 379 /* do sends: 380 1) starts[i] gives the starting index in svalues for stuff going to 381 the ith processor 382 */ 383 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 384 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 385 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 386 387 starts[0] = 0; 388 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 389 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 390 391 starts[0] = 0; 392 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 393 count = 0; 394 for (i=0; i<size; i++) { 395 if (nprocs[2*i+1]) { 396 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 397 } 398 } 399 ierr = PetscFree(starts);CHKERRQ(ierr); 400 401 base = owners[rank]; 402 403 /* wait on receives */ 404 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 405 count = nrecvs; 406 slen = 0; 407 while (count) { 408 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 409 /* unpack receives into our local space */ 410 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 411 412 source[imdex] = recv_status.MPI_SOURCE; 413 lens[imdex] = n; 414 slen += n; 415 count--; 416 } 417 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 418 419 /* move the data into the send scatter */ 420 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 421 count = 0; 422 for (i=0; i<nrecvs; i++) { 423 values = rvalues + i*nmax; 424 for (j=0; j<lens[i]; j++) { 425 lrows[count++] = values[j] - base; 426 } 427 } 428 ierr = PetscFree(rvalues);CHKERRQ(ierr); 429 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 430 ierr = PetscFree(owner);CHKERRQ(ierr); 431 ierr = PetscFree(nprocs);CHKERRQ(ierr); 432 433 /* fix right hand side if needed */ 434 if (x && b) { 435 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 436 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 437 for (i=0; i<slen; i++) { 438 bb[lrows[i]] = diag*xx[lrows[i]]; 439 } 440 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 441 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 442 } 443 444 /* actually zap the local rows */ 445 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 446 if (diag != 0.0) { 447 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 448 PetscInt m = ll->lda, i; 449 450 for (i=0; i<slen; i++) { 451 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 452 } 453 } 454 ierr = PetscFree(lrows);CHKERRQ(ierr); 455 456 /* wait on sends */ 457 if (nsends) { 458 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 459 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 460 ierr = PetscFree(send_status);CHKERRQ(ierr); 461 } 462 ierr = PetscFree(send_waits);CHKERRQ(ierr); 463 ierr = PetscFree(svalues);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatMult_MPIDense" 469 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 470 { 471 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 472 PetscErrorCode ierr; 473 474 PetscFunctionBegin; 475 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "MatMultAdd_MPIDense" 483 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 484 { 485 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 490 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 491 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatMultTranspose_MPIDense" 497 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 498 { 499 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 500 PetscErrorCode ierr; 501 PetscScalar zero = 0.0; 502 503 PetscFunctionBegin; 504 ierr = VecSet(yy,zero);CHKERRQ(ierr); 505 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 506 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 507 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNCT__ 512 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 513 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 514 { 515 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 516 PetscErrorCode ierr; 517 518 PetscFunctionBegin; 519 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 520 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 521 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 522 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatGetDiagonal_MPIDense" 528 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 529 { 530 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 531 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 532 PetscErrorCode ierr; 533 PetscInt len,i,n,m = A->rmap->n,radd; 534 PetscScalar *x,zero = 0.0; 535 536 PetscFunctionBegin; 537 ierr = VecSet(v,zero);CHKERRQ(ierr); 538 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 539 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 540 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 541 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 542 radd = A->rmap->rstart*m; 543 for (i=0; i<len; i++) { 544 x[i] = aloc->v[radd + i*m + i]; 545 } 546 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 547 PetscFunctionReturn(0); 548 } 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "MatDestroy_MPIDense" 552 PetscErrorCode MatDestroy_MPIDense(Mat mat) 553 { 554 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 555 PetscErrorCode ierr; 556 557 PetscFunctionBegin; 558 #if defined(PETSC_USE_LOG) 559 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 560 #endif 561 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 562 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 563 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 564 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 565 566 ierr = PetscFree(mat->data);CHKERRQ(ierr); 567 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 568 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",NULL);CHKERRQ(ierr); 570 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr); 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatView_MPIDense_Binary" 578 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 579 { 580 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 581 PetscErrorCode ierr; 582 PetscViewerFormat format; 583 int fd; 584 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 585 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 586 PetscScalar *work,*v,*vv; 587 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 588 589 PetscFunctionBegin; 590 if (mdn->size == 1) { 591 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 592 } else { 593 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 594 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 595 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 596 597 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 598 if (format == PETSC_VIEWER_NATIVE) { 599 600 if (!rank) { 601 /* store the matrix as a dense matrix */ 602 header[0] = MAT_FILE_CLASSID; 603 header[1] = mat->rmap->N; 604 header[2] = N; 605 header[3] = MATRIX_BINARY_FORMAT_DENSE; 606 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 607 608 /* get largest work array needed for transposing array */ 609 mmax = mat->rmap->n; 610 for (i=1; i<size; i++) { 611 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 612 } 613 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 614 615 /* write out local array, by rows */ 616 m = mat->rmap->n; 617 v = a->v; 618 for (j=0; j<N; j++) { 619 for (i=0; i<m; i++) { 620 work[j + i*N] = *v++; 621 } 622 } 623 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 624 /* get largest work array to receive messages from other processes, excludes process zero */ 625 mmax = 0; 626 for (i=1; i<size; i++) { 627 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 628 } 629 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 630 for (k = 1; k < size; k++) { 631 v = vv; 632 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 633 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 634 635 for (j = 0; j < N; j++) { 636 for (i = 0; i < m; i++) { 637 work[j + i*N] = *v++; 638 } 639 } 640 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 641 } 642 ierr = PetscFree(work);CHKERRQ(ierr); 643 ierr = PetscFree(vv);CHKERRQ(ierr); 644 } else { 645 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 646 } 647 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 648 } 649 PetscFunctionReturn(0); 650 } 651 652 #include <petscdraw.h> 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 655 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 656 { 657 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 658 PetscErrorCode ierr; 659 PetscMPIInt size = mdn->size,rank = mdn->rank; 660 PetscViewerType vtype; 661 PetscBool iascii,isdraw; 662 PetscViewer sviewer; 663 PetscViewerFormat format; 664 665 PetscFunctionBegin; 666 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 667 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 668 if (iascii) { 669 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 670 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 671 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 672 MatInfo info; 673 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 674 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 675 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 676 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 677 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 679 PetscFunctionReturn(0); 680 } else if (format == PETSC_VIEWER_ASCII_INFO) { 681 PetscFunctionReturn(0); 682 } 683 } else if (isdraw) { 684 PetscDraw draw; 685 PetscBool isnull; 686 687 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 688 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 689 if (isnull) PetscFunctionReturn(0); 690 } 691 692 if (size == 1) { 693 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 694 } else { 695 /* assemble the entire matrix onto first processor. */ 696 Mat A; 697 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 698 PetscInt *cols; 699 PetscScalar *vals; 700 701 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 702 if (!rank) { 703 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 704 } else { 705 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 706 } 707 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 708 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 709 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 710 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 711 712 /* Copy the matrix ... This isn't the most efficient means, 713 but it's quick for now */ 714 A->insertmode = INSERT_VALUES; 715 716 row = mat->rmap->rstart; 717 m = mdn->A->rmap->n; 718 for (i=0; i<m; i++) { 719 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 720 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 721 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 722 row++; 723 } 724 725 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 726 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 727 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 728 if (!rank) { 729 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 730 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 731 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 732 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 733 } 734 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 735 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 736 ierr = MatDestroy(&A);CHKERRQ(ierr); 737 } 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatView_MPIDense" 743 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 744 { 745 PetscErrorCode ierr; 746 PetscBool iascii,isbinary,isdraw,issocket; 747 748 PetscFunctionBegin; 749 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 750 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 751 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 752 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 753 754 if (iascii || issocket || isdraw) { 755 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 756 } else if (isbinary) { 757 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 758 } 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatGetInfo_MPIDense" 764 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 765 { 766 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 767 Mat mdn = mat->A; 768 PetscErrorCode ierr; 769 PetscReal isend[5],irecv[5]; 770 771 PetscFunctionBegin; 772 info->block_size = 1.0; 773 774 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 775 776 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 777 isend[3] = info->memory; isend[4] = info->mallocs; 778 if (flag == MAT_LOCAL) { 779 info->nz_used = isend[0]; 780 info->nz_allocated = isend[1]; 781 info->nz_unneeded = isend[2]; 782 info->memory = isend[3]; 783 info->mallocs = isend[4]; 784 } else if (flag == MAT_GLOBAL_MAX) { 785 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 786 787 info->nz_used = irecv[0]; 788 info->nz_allocated = irecv[1]; 789 info->nz_unneeded = irecv[2]; 790 info->memory = irecv[3]; 791 info->mallocs = irecv[4]; 792 } else if (flag == MAT_GLOBAL_SUM) { 793 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 794 795 info->nz_used = irecv[0]; 796 info->nz_allocated = irecv[1]; 797 info->nz_unneeded = irecv[2]; 798 info->memory = irecv[3]; 799 info->mallocs = irecv[4]; 800 } 801 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 802 info->fill_ratio_needed = 0; 803 info->factor_mallocs = 0; 804 PetscFunctionReturn(0); 805 } 806 807 #undef __FUNCT__ 808 #define __FUNCT__ "MatSetOption_MPIDense" 809 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 810 { 811 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 812 PetscErrorCode ierr; 813 814 PetscFunctionBegin; 815 switch (op) { 816 case MAT_NEW_NONZERO_LOCATIONS: 817 case MAT_NEW_NONZERO_LOCATION_ERR: 818 case MAT_NEW_NONZERO_ALLOCATION_ERR: 819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 820 break; 821 case MAT_ROW_ORIENTED: 822 a->roworiented = flg; 823 824 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 825 break; 826 case MAT_NEW_DIAGONALS: 827 case MAT_KEEP_NONZERO_PATTERN: 828 case MAT_USE_HASH_TABLE: 829 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 830 break; 831 case MAT_IGNORE_OFF_PROC_ENTRIES: 832 a->donotstash = flg; 833 break; 834 case MAT_SYMMETRIC: 835 case MAT_STRUCTURALLY_SYMMETRIC: 836 case MAT_HERMITIAN: 837 case MAT_SYMMETRY_ETERNAL: 838 case MAT_IGNORE_LOWER_TRIANGULAR: 839 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 840 break; 841 default: 842 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 843 } 844 PetscFunctionReturn(0); 845 } 846 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatDiagonalScale_MPIDense" 850 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 851 { 852 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 853 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 854 PetscScalar *l,*r,x,*v; 855 PetscErrorCode ierr; 856 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 857 858 PetscFunctionBegin; 859 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 860 if (ll) { 861 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 862 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 863 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 864 for (i=0; i<m; i++) { 865 x = l[i]; 866 v = mat->v + i; 867 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 868 } 869 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 870 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 871 } 872 if (rr) { 873 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 874 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 875 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 876 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 878 for (i=0; i<n; i++) { 879 x = r[i]; 880 v = mat->v + i*m; 881 for (j=0; j<m; j++) (*v++) *= x; 882 } 883 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 884 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 885 } 886 PetscFunctionReturn(0); 887 } 888 889 #undef __FUNCT__ 890 #define __FUNCT__ "MatNorm_MPIDense" 891 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 892 { 893 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 894 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 895 PetscErrorCode ierr; 896 PetscInt i,j; 897 PetscReal sum = 0.0; 898 PetscScalar *v = mat->v; 899 900 PetscFunctionBegin; 901 if (mdn->size == 1) { 902 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 903 } else { 904 if (type == NORM_FROBENIUS) { 905 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 906 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 907 } 908 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 909 *nrm = PetscSqrtReal(*nrm); 910 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 911 } else if (type == NORM_1) { 912 PetscReal *tmp,*tmp2; 913 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 914 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 915 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 916 *nrm = 0.0; 917 v = mat->v; 918 for (j=0; j<mdn->A->cmap->n; j++) { 919 for (i=0; i<mdn->A->rmap->n; i++) { 920 tmp[j] += PetscAbsScalar(*v); v++; 921 } 922 } 923 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 924 for (j=0; j<A->cmap->N; j++) { 925 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 926 } 927 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 928 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 929 } else if (type == NORM_INFINITY) { /* max row norm */ 930 PetscReal ntemp; 931 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 932 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 933 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 934 } 935 PetscFunctionReturn(0); 936 } 937 938 #undef __FUNCT__ 939 #define __FUNCT__ "MatTranspose_MPIDense" 940 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 941 { 942 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 943 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 944 Mat B; 945 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 946 PetscErrorCode ierr; 947 PetscInt j,i; 948 PetscScalar *v; 949 950 PetscFunctionBegin; 951 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 952 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 953 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 954 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 955 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 956 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 957 } else { 958 B = *matout; 959 } 960 961 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 962 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 963 for (i=0; i<m; i++) rwork[i] = rstart + i; 964 for (j=0; j<n; j++) { 965 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 966 v += m; 967 } 968 ierr = PetscFree(rwork);CHKERRQ(ierr); 969 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 970 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 972 *matout = B; 973 } else { 974 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 975 } 976 PetscFunctionReturn(0); 977 } 978 979 980 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 981 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatSetUp_MPIDense" 985 PetscErrorCode MatSetUp_MPIDense(Mat A) 986 { 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatAXPY_MPIDense" 996 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 997 { 998 PetscErrorCode ierr; 999 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1000 1001 PetscFunctionBegin; 1002 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatConjugate_MPIDense" 1008 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1009 { 1010 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1011 PetscErrorCode ierr; 1012 1013 PetscFunctionBegin; 1014 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "MatRealPart_MPIDense" 1020 PetscErrorCode MatRealPart_MPIDense(Mat A) 1021 { 1022 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1023 PetscErrorCode ierr; 1024 1025 PetscFunctionBegin; 1026 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1032 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1033 { 1034 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1035 PetscErrorCode ierr; 1036 1037 PetscFunctionBegin; 1038 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1045 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1046 { 1047 PetscErrorCode ierr; 1048 PetscInt i,n; 1049 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1050 PetscReal *work; 1051 1052 PetscFunctionBegin; 1053 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1054 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 1055 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1056 if (type == NORM_2) { 1057 for (i=0; i<n; i++) work[i] *= work[i]; 1058 } 1059 if (type == NORM_INFINITY) { 1060 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1061 } else { 1062 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1063 } 1064 ierr = PetscFree(work);CHKERRQ(ierr); 1065 if (type == NORM_2) { 1066 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1067 } 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatSetRandom_MPIDense" 1073 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1074 { 1075 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1076 PetscErrorCode ierr; 1077 PetscScalar *a; 1078 PetscInt m,n,i; 1079 1080 PetscFunctionBegin; 1081 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1082 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1083 for (i=0; i<m*n; i++) { 1084 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1085 } 1086 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 /* -------------------------------------------------------------------*/ 1091 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1092 MatGetRow_MPIDense, 1093 MatRestoreRow_MPIDense, 1094 MatMult_MPIDense, 1095 /* 4*/ MatMultAdd_MPIDense, 1096 MatMultTranspose_MPIDense, 1097 MatMultTransposeAdd_MPIDense, 1098 0, 1099 0, 1100 0, 1101 /* 10*/ 0, 1102 0, 1103 0, 1104 0, 1105 MatTranspose_MPIDense, 1106 /* 15*/ MatGetInfo_MPIDense, 1107 MatEqual_MPIDense, 1108 MatGetDiagonal_MPIDense, 1109 MatDiagonalScale_MPIDense, 1110 MatNorm_MPIDense, 1111 /* 20*/ MatAssemblyBegin_MPIDense, 1112 MatAssemblyEnd_MPIDense, 1113 MatSetOption_MPIDense, 1114 MatZeroEntries_MPIDense, 1115 /* 24*/ MatZeroRows_MPIDense, 1116 0, 1117 0, 1118 0, 1119 0, 1120 /* 29*/ MatSetUp_MPIDense, 1121 0, 1122 0, 1123 0, 1124 0, 1125 /* 34*/ MatDuplicate_MPIDense, 1126 0, 1127 0, 1128 0, 1129 0, 1130 /* 39*/ MatAXPY_MPIDense, 1131 MatGetSubMatrices_MPIDense, 1132 0, 1133 MatGetValues_MPIDense, 1134 0, 1135 /* 44*/ 0, 1136 MatScale_MPIDense, 1137 0, 1138 0, 1139 0, 1140 /* 49*/ MatSetRandom_MPIDense, 1141 0, 1142 0, 1143 0, 1144 0, 1145 /* 54*/ 0, 1146 0, 1147 0, 1148 0, 1149 0, 1150 /* 59*/ MatGetSubMatrix_MPIDense, 1151 MatDestroy_MPIDense, 1152 MatView_MPIDense, 1153 0, 1154 0, 1155 /* 64*/ 0, 1156 0, 1157 0, 1158 0, 1159 0, 1160 /* 69*/ 0, 1161 0, 1162 0, 1163 0, 1164 0, 1165 /* 74*/ 0, 1166 0, 1167 0, 1168 0, 1169 0, 1170 /* 79*/ 0, 1171 0, 1172 0, 1173 0, 1174 /* 83*/ MatLoad_MPIDense, 1175 0, 1176 0, 1177 0, 1178 0, 1179 0, 1180 /* 89*/ 1181 0, 1182 0, 1183 0, 1184 0, 1185 0, 1186 /* 94*/ 0, 1187 0, 1188 0, 1189 0, 1190 0, 1191 /* 99*/ 0, 1192 0, 1193 0, 1194 MatConjugate_MPIDense, 1195 0, 1196 /*104*/ 0, 1197 MatRealPart_MPIDense, 1198 MatImaginaryPart_MPIDense, 1199 0, 1200 0, 1201 /*109*/ 0, 1202 0, 1203 0, 1204 0, 1205 0, 1206 /*114*/ 0, 1207 0, 1208 0, 1209 0, 1210 0, 1211 /*119*/ 0, 1212 0, 1213 0, 1214 0, 1215 0, 1216 /*124*/ 0, 1217 MatGetColumnNorms_MPIDense 1218 }; 1219 1220 EXTERN_C_BEGIN 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1223 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1224 { 1225 Mat_MPIDense *a; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 mat->preallocated = PETSC_TRUE; 1230 /* Note: For now, when data is specified above, this assumes the user correctly 1231 allocates the local dense storage space. We should add error checking. */ 1232 1233 a = (Mat_MPIDense*)mat->data; 1234 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1235 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1236 a->nvec = mat->cmap->n; 1237 1238 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1239 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1240 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1241 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1242 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 EXTERN_C_END 1246 1247 EXTERN_C_BEGIN 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatCreate_MPIDense" 1250 PetscErrorCode MatCreate_MPIDense(Mat mat) 1251 { 1252 Mat_MPIDense *a; 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1257 mat->data = (void*)a; 1258 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1259 1260 mat->insertmode = NOT_SET_VALUES; 1261 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1262 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1263 1264 /* build cache for off array entries formed */ 1265 a->donotstash = PETSC_FALSE; 1266 1267 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1268 1269 /* stuff used for matrix vector multiply */ 1270 a->lvec = 0; 1271 a->Mvctx = 0; 1272 a->roworiented = PETSC_TRUE; 1273 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1276 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIDense",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C","MatMPIDenseSetPreallocation_MPIDense",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","MatMatMult_MPIAIJ_MPIDense",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","MatMatMultSymbolic_MPIAIJ_MPIDense",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","MatMatMultNumeric_MPIAIJ_MPIDense",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1282 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 EXTERN_C_END 1286 1287 /*MC 1288 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1289 1290 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1291 and MATMPIDENSE otherwise. 1292 1293 Options Database Keys: 1294 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1295 1296 Level: beginner 1297 1298 1299 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1300 M*/ 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1304 /*@C 1305 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1306 1307 Not collective 1308 1309 Input Parameters: 1310 . A - the matrix 1311 - data - optional location of matrix data. Set data=NULL for PETSc 1312 to control all matrix memory allocation. 1313 1314 Notes: 1315 The dense format is fully compatible with standard Fortran 77 1316 storage by columns. 1317 1318 The data input variable is intended primarily for Fortran programmers 1319 who wish to allocate their own matrix memory space. Most users should 1320 set data=NULL. 1321 1322 Level: intermediate 1323 1324 .keywords: matrix,dense, parallel 1325 1326 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1327 @*/ 1328 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1329 { 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatCreateDense" 1339 /*@C 1340 MatCreateDense - Creates a parallel matrix in dense format. 1341 1342 Collective on MPI_Comm 1343 1344 Input Parameters: 1345 + comm - MPI communicator 1346 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1347 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1348 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1349 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1350 - data - optional location of matrix data. Set data=NULL (NULL_SCALAR for Fortran users) for PETSc 1351 to control all matrix memory allocation. 1352 1353 Output Parameter: 1354 . A - the matrix 1355 1356 Notes: 1357 The dense format is fully compatible with standard Fortran 77 1358 storage by columns. 1359 1360 The data input variable is intended primarily for Fortran programmers 1361 who wish to allocate their own matrix memory space. Most users should 1362 set data=NULL (NULL_SCALAR for Fortran users). 1363 1364 The user MUST specify either the local or global matrix dimensions 1365 (possibly both). 1366 1367 Level: intermediate 1368 1369 .keywords: matrix,dense, parallel 1370 1371 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1372 @*/ 1373 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1374 { 1375 PetscErrorCode ierr; 1376 PetscMPIInt size; 1377 1378 PetscFunctionBegin; 1379 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1380 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1381 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1382 if (size > 1) { 1383 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1384 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1385 } else { 1386 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1387 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1388 } 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatDuplicate_MPIDense" 1394 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1395 { 1396 Mat mat; 1397 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1398 PetscErrorCode ierr; 1399 1400 PetscFunctionBegin; 1401 *newmat = 0; 1402 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1403 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1404 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1405 a = (Mat_MPIDense*)mat->data; 1406 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1407 1408 mat->factortype = A->factortype; 1409 mat->assembled = PETSC_TRUE; 1410 mat->preallocated = PETSC_TRUE; 1411 1412 a->size = oldmat->size; 1413 a->rank = oldmat->rank; 1414 mat->insertmode = NOT_SET_VALUES; 1415 a->nvec = oldmat->nvec; 1416 a->donotstash = oldmat->donotstash; 1417 1418 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1419 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1420 1421 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1422 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1423 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1424 1425 *newmat = mat; 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1431 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1432 { 1433 PetscErrorCode ierr; 1434 PetscMPIInt rank,size; 1435 PetscInt *rowners,i,m,nz,j; 1436 PetscScalar *array,*vals,*vals_ptr; 1437 1438 PetscFunctionBegin; 1439 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1440 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1441 1442 /* determine ownership of all rows */ 1443 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1444 else m = newmat->rmap->n; 1445 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1446 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1447 rowners[0] = 0; 1448 for (i=2; i<=size; i++) { 1449 rowners[i] += rowners[i-1]; 1450 } 1451 1452 if (!sizesset) { 1453 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1454 } 1455 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1456 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1457 1458 if (!rank) { 1459 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1460 1461 /* read in my part of the matrix numerical values */ 1462 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1463 1464 /* insert into matrix-by row (this is why cannot directly read into array */ 1465 vals_ptr = vals; 1466 for (i=0; i<m; i++) { 1467 for (j=0; j<N; j++) { 1468 array[i + j*m] = *vals_ptr++; 1469 } 1470 } 1471 1472 /* read in other processors and ship out */ 1473 for (i=1; i<size; i++) { 1474 nz = (rowners[i+1] - rowners[i])*N; 1475 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1476 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1477 } 1478 } else { 1479 /* receive numeric values */ 1480 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1481 1482 /* receive message of values*/ 1483 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1484 1485 /* insert into matrix-by row (this is why cannot directly read into array */ 1486 vals_ptr = vals; 1487 for (i=0; i<m; i++) { 1488 for (j=0; j<N; j++) { 1489 array[i + j*m] = *vals_ptr++; 1490 } 1491 } 1492 } 1493 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1494 ierr = PetscFree(rowners);CHKERRQ(ierr); 1495 ierr = PetscFree(vals);CHKERRQ(ierr); 1496 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1497 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatLoad_MPIDense" 1503 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1504 { 1505 PetscScalar *vals,*svals; 1506 MPI_Comm comm; 1507 MPI_Status status; 1508 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1509 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1510 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1511 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1512 int fd; 1513 PetscErrorCode ierr; 1514 1515 PetscFunctionBegin; 1516 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1517 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1518 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1519 if (!rank) { 1520 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1521 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1522 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1523 } 1524 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1525 1526 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1527 M = header[1]; N = header[2]; nz = header[3]; 1528 1529 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1530 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1531 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1532 1533 /* If global sizes are set, check if they are consistent with that given in the file */ 1534 if (sizesset) { 1535 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1536 } 1537 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1538 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1539 1540 /* 1541 Handle case where matrix is stored on disk as a dense matrix 1542 */ 1543 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1544 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1545 PetscFunctionReturn(0); 1546 } 1547 1548 /* determine ownership of all rows */ 1549 if (newmat->rmap->n < 0) { 1550 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1551 } else { 1552 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1553 } 1554 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1555 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1556 rowners[0] = 0; 1557 for (i=2; i<=size; i++) { 1558 rowners[i] += rowners[i-1]; 1559 } 1560 rstart = rowners[rank]; 1561 rend = rowners[rank+1]; 1562 1563 /* distribute row lengths to all processors */ 1564 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1565 if (!rank) { 1566 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1567 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1568 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1569 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1570 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1571 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1572 } else { 1573 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1574 } 1575 1576 if (!rank) { 1577 /* calculate the number of nonzeros on each processor */ 1578 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1579 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1580 for (i=0; i<size; i++) { 1581 for (j=rowners[i]; j< rowners[i+1]; j++) { 1582 procsnz[i] += rowlengths[j]; 1583 } 1584 } 1585 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1586 1587 /* determine max buffer needed and allocate it */ 1588 maxnz = 0; 1589 for (i=0; i<size; i++) { 1590 maxnz = PetscMax(maxnz,procsnz[i]); 1591 } 1592 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1593 1594 /* read in my part of the matrix column indices */ 1595 nz = procsnz[0]; 1596 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1597 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1598 1599 /* read in every one elses and ship off */ 1600 for (i=1; i<size; i++) { 1601 nz = procsnz[i]; 1602 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1603 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1604 } 1605 ierr = PetscFree(cols);CHKERRQ(ierr); 1606 } else { 1607 /* determine buffer space needed for message */ 1608 nz = 0; 1609 for (i=0; i<m; i++) { 1610 nz += ourlens[i]; 1611 } 1612 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1613 1614 /* receive message of column indices*/ 1615 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1616 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1617 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1618 } 1619 1620 /* loop over local rows, determining number of off diagonal entries */ 1621 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1622 jj = 0; 1623 for (i=0; i<m; i++) { 1624 for (j=0; j<ourlens[i]; j++) { 1625 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1626 jj++; 1627 } 1628 } 1629 1630 /* create our matrix */ 1631 for (i=0; i<m; i++) ourlens[i] -= offlens[i]; 1632 1633 if (!sizesset) { 1634 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1635 } 1636 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1637 for (i=0; i<m; i++) ourlens[i] += offlens[i]; 1638 1639 if (!rank) { 1640 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1641 1642 /* read in my part of the matrix numerical values */ 1643 nz = procsnz[0]; 1644 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1645 1646 /* insert into matrix */ 1647 jj = rstart; 1648 smycols = mycols; 1649 svals = vals; 1650 for (i=0; i<m; i++) { 1651 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1652 smycols += ourlens[i]; 1653 svals += ourlens[i]; 1654 jj++; 1655 } 1656 1657 /* read in other processors and ship out */ 1658 for (i=1; i<size; i++) { 1659 nz = procsnz[i]; 1660 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1661 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1662 } 1663 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1664 } else { 1665 /* receive numeric values */ 1666 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1667 1668 /* receive message of values*/ 1669 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1670 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1671 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1672 1673 /* insert into matrix */ 1674 jj = rstart; 1675 smycols = mycols; 1676 svals = vals; 1677 for (i=0; i<m; i++) { 1678 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1679 smycols += ourlens[i]; 1680 svals += ourlens[i]; 1681 jj++; 1682 } 1683 } 1684 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 1685 ierr = PetscFree(vals);CHKERRQ(ierr); 1686 ierr = PetscFree(mycols);CHKERRQ(ierr); 1687 ierr = PetscFree(rowners);CHKERRQ(ierr); 1688 1689 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1690 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatEqual_MPIDense" 1696 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1697 { 1698 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1699 Mat a,b; 1700 PetscBool flg; 1701 PetscErrorCode ierr; 1702 1703 PetscFunctionBegin; 1704 a = matA->A; 1705 b = matB->A; 1706 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1707 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1708 PetscFunctionReturn(0); 1709 } 1710 1711