1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) { 34 *B = mat->A; 35 } else { 36 *B = A; 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatGetRow_MPIDense" 43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 48 49 PetscFunctionBegin; 50 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 51 lrow = row - rstart; 52 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatRestoreRow_MPIDense" 58 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 59 { 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 64 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 71 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 72 { 73 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 74 PetscErrorCode ierr; 75 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 76 PetscScalar *array; 77 MPI_Comm comm; 78 Mat B; 79 80 PetscFunctionBegin; 81 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 82 83 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 84 if (!B) { 85 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 86 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 87 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 88 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 89 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 90 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 91 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 93 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 94 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 95 *a = B; 96 ierr = MatDestroy(&B);CHKERRQ(ierr); 97 } else { 98 *a = B; 99 } 100 PetscFunctionReturn(0); 101 } 102 EXTERN_C_END 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatSetValues_MPIDense" 106 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 107 { 108 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 109 PetscErrorCode ierr; 110 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 111 PetscBool roworiented = A->roworiented; 112 113 PetscFunctionBegin; 114 if (v) PetscValidScalarPointer(v,6); 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 mat->assembled = PETSC_FALSE; 132 if (roworiented) { 133 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 134 } else { 135 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 136 } 137 } 138 } 139 } 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatGetValues_MPIDense" 145 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 146 { 147 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 148 PetscErrorCode ierr; 149 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 150 151 PetscFunctionBegin; 152 for (i=0; i<m; i++) { 153 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 154 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 155 if (idxm[i] >= rstart && idxm[i] < rend) { 156 row = idxm[i] - rstart; 157 for (j=0; j<n; j++) { 158 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 159 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 161 } 162 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 163 } 164 PetscFunctionReturn(0); 165 } 166 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatDenseGetArray_MPIDense" 169 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 170 { 171 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 172 PetscErrorCode ierr; 173 174 PetscFunctionBegin; 175 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 #undef __FUNCT__ 180 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 181 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 182 { 183 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 184 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 185 PetscErrorCode ierr; 186 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 187 const PetscInt *irow,*icol; 188 PetscScalar *av,*bv,*v = lmat->v; 189 Mat newmat; 190 IS iscol_local; 191 192 PetscFunctionBegin; 193 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 194 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 195 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 196 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 197 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 198 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 199 200 /* No parallel redistribution currently supported! Should really check each index set 201 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 202 original matrix! */ 203 204 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 205 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 206 207 /* Check submatrix call */ 208 if (scall == MAT_REUSE_MATRIX) { 209 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 210 /* Really need to test rows and column sizes! */ 211 newmat = *B; 212 } else { 213 /* Create and fill new matrix */ 214 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 215 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 216 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 217 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 218 } 219 220 /* Now extract the data pointers and do the copy, column at a time */ 221 newmatd = (Mat_MPIDense*)newmat->data; 222 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 223 224 for (i=0; i<Ncols; i++) { 225 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 226 for (j=0; j<nrows; j++) { 227 *bv++ = av[irow[j] - rstart]; 228 } 229 } 230 231 /* Assemble the matrices so that the correct flags are set */ 232 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 234 235 /* Free work space */ 236 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 237 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 238 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 239 *B = newmat; 240 PetscFunctionReturn(0); 241 } 242 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 245 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 246 { 247 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 248 PetscErrorCode ierr; 249 250 PetscFunctionBegin; 251 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 257 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 258 { 259 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 260 MPI_Comm comm = ((PetscObject)mat)->comm; 261 PetscErrorCode ierr; 262 PetscInt nstash,reallocs; 263 InsertMode addv; 264 265 PetscFunctionBegin; 266 /* make sure all processors are either in INSERTMODE or ADDMODE */ 267 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 268 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 269 mat->insertmode = addv; /* in case this processor had no cache */ 270 271 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 272 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 273 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 279 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 280 { 281 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 282 PetscErrorCode ierr; 283 PetscInt i,*row,*col,flg,j,rstart,ncols; 284 PetscMPIInt n; 285 PetscScalar *val; 286 InsertMode addv=mat->insertmode; 287 288 PetscFunctionBegin; 289 /* wait on receives */ 290 while (1) { 291 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 292 if (!flg) break; 293 294 for (i=0; i<n;) { 295 /* Now identify the consecutive vals belonging to the same row */ 296 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 297 if (j < n) ncols = j-i; 298 else ncols = n-i; 299 /* Now assemble all these values with a single function call */ 300 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 301 i = j; 302 } 303 } 304 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 305 306 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 307 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 308 309 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 310 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatZeroEntries_MPIDense" 317 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 318 { 319 PetscErrorCode ierr; 320 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 321 322 PetscFunctionBegin; 323 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 /* the code does not do the diagonal entries correctly unless the 328 matrix is square and the column and row owerships are identical. 329 This is a BUG. The only way to fix it seems to be to access 330 mdn->A and mdn->B directly and not through the MatZeroRows() 331 routine. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatZeroRows_MPIDense" 335 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 336 { 337 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 338 PetscErrorCode ierr; 339 PetscInt i,*owners = A->rmap->range; 340 PetscInt *nprocs,j,idx,nsends; 341 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 342 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 343 PetscInt *lens,*lrows,*values; 344 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 345 MPI_Comm comm = ((PetscObject)A)->comm; 346 MPI_Request *send_waits,*recv_waits; 347 MPI_Status recv_status,*send_status; 348 PetscBool found; 349 const PetscScalar *xx; 350 PetscScalar *bb; 351 352 PetscFunctionBegin; 353 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Only handles square matrices"); 354 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 355 /* first count number of contributors to each processor */ 356 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 357 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 358 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 359 for (i=0; i<N; i++) { 360 idx = rows[i]; 361 found = PETSC_FALSE; 362 for (j=0; j<size; j++) { 363 if (idx >= owners[j] && idx < owners[j+1]) { 364 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 365 } 366 } 367 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 368 } 369 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 370 371 /* inform other processors of number of messages and max length*/ 372 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 373 374 /* post receives: */ 375 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 376 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 377 for (i=0; i<nrecvs; i++) { 378 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 379 } 380 381 /* do sends: 382 1) starts[i] gives the starting index in svalues for stuff going to 383 the ith processor 384 */ 385 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 386 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 387 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 388 starts[0] = 0; 389 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 390 for (i=0; i<N; i++) { 391 svalues[starts[owner[i]]++] = rows[i]; 392 } 393 394 starts[0] = 0; 395 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 396 count = 0; 397 for (i=0; i<size; i++) { 398 if (nprocs[2*i+1]) { 399 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 400 } 401 } 402 ierr = PetscFree(starts);CHKERRQ(ierr); 403 404 base = owners[rank]; 405 406 /* wait on receives */ 407 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 408 count = nrecvs; 409 slen = 0; 410 while (count) { 411 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 412 /* unpack receives into our local space */ 413 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 414 source[imdex] = recv_status.MPI_SOURCE; 415 lens[imdex] = n; 416 slen += n; 417 count--; 418 } 419 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 420 421 /* move the data into the send scatter */ 422 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 423 count = 0; 424 for (i=0; i<nrecvs; i++) { 425 values = rvalues + i*nmax; 426 for (j=0; j<lens[i]; j++) { 427 lrows[count++] = values[j] - base; 428 } 429 } 430 ierr = PetscFree(rvalues);CHKERRQ(ierr); 431 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 432 ierr = PetscFree(owner);CHKERRQ(ierr); 433 ierr = PetscFree(nprocs);CHKERRQ(ierr); 434 435 /* fix right hand side if needed */ 436 if (x && b) { 437 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 438 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 439 for (i=0; i<slen; i++) { 440 bb[lrows[i]] = diag*xx[lrows[i]]; 441 } 442 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 443 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 444 } 445 446 /* actually zap the local rows */ 447 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 448 if (diag != 0.0) { 449 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 450 PetscInt m = ll->lda, i; 451 452 for (i=0; i<slen; i++) { 453 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 454 } 455 } 456 ierr = PetscFree(lrows);CHKERRQ(ierr); 457 458 /* wait on sends */ 459 if (nsends) { 460 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 461 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 462 ierr = PetscFree(send_status);CHKERRQ(ierr); 463 } 464 ierr = PetscFree(send_waits);CHKERRQ(ierr); 465 ierr = PetscFree(svalues);CHKERRQ(ierr); 466 467 PetscFunctionReturn(0); 468 } 469 470 #undef __FUNCT__ 471 #define __FUNCT__ "MatMult_MPIDense" 472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 477 PetscFunctionBegin; 478 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 480 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatMultAdd_MPIDense" 486 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 487 { 488 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 489 PetscErrorCode ierr; 490 491 PetscFunctionBegin; 492 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 493 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 494 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatMultTranspose_MPIDense" 500 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 501 { 502 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 503 PetscErrorCode ierr; 504 PetscScalar zero = 0.0; 505 506 PetscFunctionBegin; 507 ierr = VecSet(yy,zero);CHKERRQ(ierr); 508 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 509 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 510 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 511 PetscFunctionReturn(0); 512 } 513 514 #undef __FUNCT__ 515 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 516 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 517 { 518 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 519 PetscErrorCode ierr; 520 521 PetscFunctionBegin; 522 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 523 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 524 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 525 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 #undef __FUNCT__ 530 #define __FUNCT__ "MatGetDiagonal_MPIDense" 531 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 532 { 533 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 534 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 535 PetscErrorCode ierr; 536 PetscInt len,i,n,m = A->rmap->n,radd; 537 PetscScalar *x,zero = 0.0; 538 539 PetscFunctionBegin; 540 ierr = VecSet(v,zero);CHKERRQ(ierr); 541 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 542 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 543 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 544 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 545 radd = A->rmap->rstart*m; 546 for (i=0; i<len; i++) { 547 x[i] = aloc->v[radd + i*m + i]; 548 } 549 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 550 PetscFunctionReturn(0); 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "MatDestroy_MPIDense" 555 PetscErrorCode MatDestroy_MPIDense(Mat mat) 556 { 557 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 558 PetscErrorCode ierr; 559 560 PetscFunctionBegin; 561 562 #if defined(PETSC_USE_LOG) 563 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 564 #endif 565 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 566 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 567 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 568 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 569 570 ierr = PetscFree(mat->data);CHKERRQ(ierr); 571 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatView_MPIDense_Binary" 582 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 583 { 584 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 585 PetscErrorCode ierr; 586 PetscViewerFormat format; 587 int fd; 588 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 589 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 590 PetscScalar *work,*v,*vv; 591 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 592 593 PetscFunctionBegin; 594 if (mdn->size == 1) { 595 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 596 } else { 597 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 598 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 599 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 600 601 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 602 if (format == PETSC_VIEWER_NATIVE) { 603 604 if (!rank) { 605 /* store the matrix as a dense matrix */ 606 header[0] = MAT_FILE_CLASSID; 607 header[1] = mat->rmap->N; 608 header[2] = N; 609 header[3] = MATRIX_BINARY_FORMAT_DENSE; 610 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 611 612 /* get largest work array needed for transposing array */ 613 mmax = mat->rmap->n; 614 for (i=1; i<size; i++) { 615 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 616 } 617 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 618 619 /* write out local array, by rows */ 620 m = mat->rmap->n; 621 v = a->v; 622 for (j=0; j<N; j++) { 623 for (i=0; i<m; i++) { 624 work[j + i*N] = *v++; 625 } 626 } 627 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 628 /* get largest work array to receive messages from other processes, excludes process zero */ 629 mmax = 0; 630 for (i=1; i<size; i++) { 631 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 632 } 633 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 634 for (k = 1; k < size; k++) { 635 v = vv; 636 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 637 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 638 639 for (j = 0; j < N; j++) { 640 for (i = 0; i < m; i++) { 641 work[j + i*N] = *v++; 642 } 643 } 644 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 645 } 646 ierr = PetscFree(work);CHKERRQ(ierr); 647 ierr = PetscFree(vv);CHKERRQ(ierr); 648 } else { 649 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 650 } 651 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 652 } 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 658 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 659 { 660 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 661 PetscErrorCode ierr; 662 PetscMPIInt size = mdn->size,rank = mdn->rank; 663 PetscViewerType vtype; 664 PetscBool iascii,isdraw; 665 PetscViewer sviewer; 666 PetscViewerFormat format; 667 668 PetscFunctionBegin; 669 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 670 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 671 if (iascii) { 672 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 673 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 674 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 675 MatInfo info; 676 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 677 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 678 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 679 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 680 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 681 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } else if (format == PETSC_VIEWER_ASCII_INFO) { 684 PetscFunctionReturn(0); 685 } 686 } else if (isdraw) { 687 PetscDraw draw; 688 PetscBool isnull; 689 690 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 691 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 692 if (isnull) PetscFunctionReturn(0); 693 } 694 695 if (size == 1) { 696 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 697 } else { 698 /* assemble the entire matrix onto first processor. */ 699 Mat A; 700 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 701 PetscInt *cols; 702 PetscScalar *vals; 703 704 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 705 if (!rank) { 706 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 707 } else { 708 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 709 } 710 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 711 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 712 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 713 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 714 715 /* Copy the matrix ... This isn't the most efficient means, 716 but it's quick for now */ 717 A->insertmode = INSERT_VALUES; 718 row = mat->rmap->rstart; m = mdn->A->rmap->n; 719 for (i=0; i<m; i++) { 720 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 721 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 722 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 723 row++; 724 } 725 726 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 727 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 728 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 729 if (!rank) { 730 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 731 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 732 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 733 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 734 } 735 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 736 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 737 ierr = MatDestroy(&A);CHKERRQ(ierr); 738 } 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatView_MPIDense" 744 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 745 { 746 PetscErrorCode ierr; 747 PetscBool iascii,isbinary,isdraw,issocket; 748 749 PetscFunctionBegin; 750 751 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 752 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 753 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 754 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 755 756 if (iascii || issocket || isdraw) { 757 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 758 } else if (isbinary) { 759 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 760 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatGetInfo_MPIDense" 766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 767 { 768 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 769 Mat mdn = mat->A; 770 PetscErrorCode ierr; 771 PetscReal isend[5],irecv[5]; 772 773 PetscFunctionBegin; 774 info->block_size = 1.0; 775 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 776 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 777 isend[3] = info->memory; isend[4] = info->mallocs; 778 if (flag == MAT_LOCAL) { 779 info->nz_used = isend[0]; 780 info->nz_allocated = isend[1]; 781 info->nz_unneeded = isend[2]; 782 info->memory = isend[3]; 783 info->mallocs = isend[4]; 784 } else if (flag == MAT_GLOBAL_MAX) { 785 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 786 info->nz_used = irecv[0]; 787 info->nz_allocated = irecv[1]; 788 info->nz_unneeded = irecv[2]; 789 info->memory = irecv[3]; 790 info->mallocs = irecv[4]; 791 } else if (flag == MAT_GLOBAL_SUM) { 792 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 793 info->nz_used = irecv[0]; 794 info->nz_allocated = irecv[1]; 795 info->nz_unneeded = irecv[2]; 796 info->memory = irecv[3]; 797 info->mallocs = irecv[4]; 798 } 799 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 800 info->fill_ratio_needed = 0; 801 info->factor_mallocs = 0; 802 PetscFunctionReturn(0); 803 } 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatSetOption_MPIDense" 807 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 808 { 809 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 810 PetscErrorCode ierr; 811 812 PetscFunctionBegin; 813 switch (op) { 814 case MAT_NEW_NONZERO_LOCATIONS: 815 case MAT_NEW_NONZERO_LOCATION_ERR: 816 case MAT_NEW_NONZERO_ALLOCATION_ERR: 817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 818 break; 819 case MAT_ROW_ORIENTED: 820 a->roworiented = flg; 821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 822 break; 823 case MAT_NEW_DIAGONALS: 824 case MAT_KEEP_NONZERO_PATTERN: 825 case MAT_USE_HASH_TABLE: 826 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 827 break; 828 case MAT_IGNORE_OFF_PROC_ENTRIES: 829 a->donotstash = flg; 830 break; 831 case MAT_SYMMETRIC: 832 case MAT_STRUCTURALLY_SYMMETRIC: 833 case MAT_HERMITIAN: 834 case MAT_SYMMETRY_ETERNAL: 835 case MAT_IGNORE_LOWER_TRIANGULAR: 836 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 837 break; 838 default: 839 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 840 } 841 PetscFunctionReturn(0); 842 } 843 844 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatDiagonalScale_MPIDense" 847 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 848 { 849 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 850 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 851 PetscScalar *l,*r,x,*v; 852 PetscErrorCode ierr; 853 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 854 855 PetscFunctionBegin; 856 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 857 if (ll) { 858 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 859 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 860 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 861 for (i=0; i<m; i++) { 862 x = l[i]; 863 v = mat->v + i; 864 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 865 } 866 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 867 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 868 } 869 if (rr) { 870 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 871 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 872 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 875 for (i=0; i<n; i++) { 876 x = r[i]; 877 v = mat->v + i*m; 878 for (j=0; j<m; j++) { (*v++) *= x;} 879 } 880 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 881 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 882 } 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatNorm_MPIDense" 888 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 889 { 890 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 891 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 892 PetscErrorCode ierr; 893 PetscInt i,j; 894 PetscReal sum = 0.0; 895 PetscScalar *v = mat->v; 896 897 PetscFunctionBegin; 898 if (mdn->size == 1) { 899 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 900 } else { 901 if (type == NORM_FROBENIUS) { 902 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 903 #if defined(PETSC_USE_COMPLEX) 904 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 905 #else 906 sum += (*v)*(*v); v++; 907 #endif 908 } 909 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 910 *nrm = PetscSqrtReal(*nrm); 911 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 912 } else if (type == NORM_1) { 913 PetscReal *tmp,*tmp2; 914 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 915 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 916 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 917 *nrm = 0.0; 918 v = mat->v; 919 for (j=0; j<mdn->A->cmap->n; j++) { 920 for (i=0; i<mdn->A->rmap->n; i++) { 921 tmp[j] += PetscAbsScalar(*v); v++; 922 } 923 } 924 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 925 for (j=0; j<A->cmap->N; j++) { 926 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 927 } 928 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 929 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 930 } else if (type == NORM_INFINITY) { /* max row norm */ 931 PetscReal ntemp; 932 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 933 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 934 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 935 } 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatTranspose_MPIDense" 941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 942 { 943 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 944 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 945 Mat B; 946 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 947 PetscErrorCode ierr; 948 PetscInt j,i; 949 PetscScalar *v; 950 951 PetscFunctionBegin; 952 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 953 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 954 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 955 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 956 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 957 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 958 } else { 959 B = *matout; 960 } 961 962 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 963 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 964 for (i=0; i<m; i++) rwork[i] = rstart + i; 965 for (j=0; j<n; j++) { 966 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 967 v += m; 968 } 969 ierr = PetscFree(rwork);CHKERRQ(ierr); 970 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 973 *matout = B; 974 } else { 975 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 976 } 977 PetscFunctionReturn(0); 978 } 979 980 981 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 982 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatSetUp_MPIDense" 986 PetscErrorCode MatSetUp_MPIDense(Mat A) 987 { 988 PetscErrorCode ierr; 989 990 PetscFunctionBegin; 991 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 992 PetscFunctionReturn(0); 993 } 994 995 #undef __FUNCT__ 996 #define __FUNCT__ "MatAXPY_MPIDense" 997 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 998 { 999 PetscErrorCode ierr; 1000 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1001 1002 PetscFunctionBegin; 1003 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatConjugate_MPIDense" 1009 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1010 { 1011 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1012 PetscErrorCode ierr; 1013 1014 PetscFunctionBegin; 1015 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatRealPart_MPIDense" 1021 PetscErrorCode MatRealPart_MPIDense(Mat A) 1022 { 1023 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1024 PetscErrorCode ierr; 1025 1026 PetscFunctionBegin; 1027 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1028 PetscFunctionReturn(0); 1029 } 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1033 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1034 { 1035 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1036 PetscErrorCode ierr; 1037 1038 PetscFunctionBegin; 1039 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1040 PetscFunctionReturn(0); 1041 } 1042 1043 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1046 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1047 { 1048 PetscErrorCode ierr; 1049 PetscInt i,n; 1050 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1051 PetscReal *work; 1052 1053 PetscFunctionBegin; 1054 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 1055 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 1056 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1057 if (type == NORM_2) { 1058 for (i=0; i<n; i++) work[i] *= work[i]; 1059 } 1060 if (type == NORM_INFINITY) { 1061 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1062 } else { 1063 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1064 } 1065 ierr = PetscFree(work);CHKERRQ(ierr); 1066 if (type == NORM_2) { 1067 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1068 } 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatSetRandom_MPIDense" 1074 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1075 { 1076 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1077 PetscErrorCode ierr; 1078 PetscScalar *a; 1079 PetscInt m,n,i; 1080 1081 PetscFunctionBegin; 1082 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1083 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1084 for (i=0; i<m*n; i++) { 1085 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1086 } 1087 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 /* -------------------------------------------------------------------*/ 1092 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1093 MatGetRow_MPIDense, 1094 MatRestoreRow_MPIDense, 1095 MatMult_MPIDense, 1096 /* 4*/ MatMultAdd_MPIDense, 1097 MatMultTranspose_MPIDense, 1098 MatMultTransposeAdd_MPIDense, 1099 0, 1100 0, 1101 0, 1102 /*10*/ 0, 1103 0, 1104 0, 1105 0, 1106 MatTranspose_MPIDense, 1107 /*15*/ MatGetInfo_MPIDense, 1108 MatEqual_MPIDense, 1109 MatGetDiagonal_MPIDense, 1110 MatDiagonalScale_MPIDense, 1111 MatNorm_MPIDense, 1112 /*20*/ MatAssemblyBegin_MPIDense, 1113 MatAssemblyEnd_MPIDense, 1114 MatSetOption_MPIDense, 1115 MatZeroEntries_MPIDense, 1116 /*24*/ MatZeroRows_MPIDense, 1117 0, 1118 0, 1119 0, 1120 0, 1121 /*29*/ MatSetUp_MPIDense, 1122 0, 1123 0, 1124 0, 1125 0, 1126 /*34*/ MatDuplicate_MPIDense, 1127 0, 1128 0, 1129 0, 1130 0, 1131 /*39*/ MatAXPY_MPIDense, 1132 MatGetSubMatrices_MPIDense, 1133 0, 1134 MatGetValues_MPIDense, 1135 0, 1136 /*44*/ 0, 1137 MatScale_MPIDense, 1138 0, 1139 0, 1140 0, 1141 /*49*/ MatSetRandom_MPIDense, 1142 0, 1143 0, 1144 0, 1145 0, 1146 /*54*/ 0, 1147 0, 1148 0, 1149 0, 1150 0, 1151 /*59*/ MatGetSubMatrix_MPIDense, 1152 MatDestroy_MPIDense, 1153 MatView_MPIDense, 1154 0, 1155 0, 1156 /*64*/ 0, 1157 0, 1158 0, 1159 0, 1160 0, 1161 /*69*/ 0, 1162 0, 1163 0, 1164 0, 1165 0, 1166 /*74*/ 0, 1167 0, 1168 0, 1169 0, 1170 0, 1171 /*79*/ 0, 1172 0, 1173 0, 1174 0, 1175 /*83*/ MatLoad_MPIDense, 1176 0, 1177 0, 1178 0, 1179 0, 1180 0, 1181 /*89*/ 1182 0, 1183 0, 1184 0, 1185 0, 1186 0, 1187 /*94*/ 0, 1188 0, 1189 0, 1190 0, 1191 0, 1192 /*99*/ 0, 1193 0, 1194 0, 1195 MatConjugate_MPIDense, 1196 0, 1197 /*104*/0, 1198 MatRealPart_MPIDense, 1199 MatImaginaryPart_MPIDense, 1200 0, 1201 0, 1202 /*109*/0, 1203 0, 1204 0, 1205 0, 1206 0, 1207 /*114*/0, 1208 0, 1209 0, 1210 0, 1211 0, 1212 /*119*/0, 1213 0, 1214 0, 1215 0, 1216 0, 1217 /*124*/0, 1218 MatGetColumnNorms_MPIDense 1219 }; 1220 1221 EXTERN_C_BEGIN 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1224 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1225 { 1226 Mat_MPIDense *a; 1227 PetscErrorCode ierr; 1228 1229 PetscFunctionBegin; 1230 mat->preallocated = PETSC_TRUE; 1231 /* Note: For now, when data is specified above, this assumes the user correctly 1232 allocates the local dense storage space. We should add error checking. */ 1233 1234 a = (Mat_MPIDense*)mat->data; 1235 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1236 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1237 a->nvec = mat->cmap->n; 1238 1239 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1240 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1241 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1242 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1243 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 EXTERN_C_END 1247 1248 EXTERN_C_BEGIN 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatCreate_MPIDense" 1251 PetscErrorCode MatCreate_MPIDense(Mat mat) 1252 { 1253 Mat_MPIDense *a; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1258 mat->data = (void*)a; 1259 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1260 1261 mat->insertmode = NOT_SET_VALUES; 1262 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1263 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1264 1265 /* build cache for off array entries formed */ 1266 a->donotstash = PETSC_FALSE; 1267 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1268 1269 /* stuff used for matrix vector multiply */ 1270 a->lvec = 0; 1271 a->Mvctx = 0; 1272 a->roworiented = PETSC_TRUE; 1273 1274 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1276 1277 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1278 "MatGetDiagonalBlock_MPIDense", 1279 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1281 "MatMPIDenseSetPreallocation_MPIDense", 1282 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1284 "MatMatMult_MPIAIJ_MPIDense", 1285 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1287 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1288 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1290 "MatMatMultNumeric_MPIAIJ_MPIDense", 1291 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1292 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1293 1294 PetscFunctionReturn(0); 1295 } 1296 EXTERN_C_END 1297 1298 /*MC 1299 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1300 1301 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1302 and MATMPIDENSE otherwise. 1303 1304 Options Database Keys: 1305 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1306 1307 Level: beginner 1308 1309 1310 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1311 M*/ 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1315 /*@C 1316 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1317 1318 Not collective 1319 1320 Input Parameters: 1321 . A - the matrix 1322 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1323 to control all matrix memory allocation. 1324 1325 Notes: 1326 The dense format is fully compatible with standard Fortran 77 1327 storage by columns. 1328 1329 The data input variable is intended primarily for Fortran programmers 1330 who wish to allocate their own matrix memory space. Most users should 1331 set data=PETSC_NULL. 1332 1333 Level: intermediate 1334 1335 .keywords: matrix,dense, parallel 1336 1337 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1338 @*/ 1339 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1340 { 1341 PetscErrorCode ierr; 1342 1343 PetscFunctionBegin; 1344 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatCreateDense" 1350 /*@C 1351 MatCreateDense - Creates a parallel matrix in dense format. 1352 1353 Collective on MPI_Comm 1354 1355 Input Parameters: 1356 + comm - MPI communicator 1357 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1358 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1359 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1360 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1361 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1362 to control all matrix memory allocation. 1363 1364 Output Parameter: 1365 . A - the matrix 1366 1367 Notes: 1368 The dense format is fully compatible with standard Fortran 77 1369 storage by columns. 1370 1371 The data input variable is intended primarily for Fortran programmers 1372 who wish to allocate their own matrix memory space. Most users should 1373 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1374 1375 The user MUST specify either the local or global matrix dimensions 1376 (possibly both). 1377 1378 Level: intermediate 1379 1380 .keywords: matrix,dense, parallel 1381 1382 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1383 @*/ 1384 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1385 { 1386 PetscErrorCode ierr; 1387 PetscMPIInt size; 1388 1389 PetscFunctionBegin; 1390 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1391 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1392 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1393 if (size > 1) { 1394 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1395 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1396 } else { 1397 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1398 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatDuplicate_MPIDense" 1405 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1406 { 1407 Mat mat; 1408 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1409 PetscErrorCode ierr; 1410 1411 PetscFunctionBegin; 1412 *newmat = 0; 1413 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1414 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1415 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1416 a = (Mat_MPIDense*)mat->data; 1417 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1418 1419 mat->factortype = A->factortype; 1420 mat->assembled = PETSC_TRUE; 1421 mat->preallocated = PETSC_TRUE; 1422 1423 a->size = oldmat->size; 1424 a->rank = oldmat->rank; 1425 mat->insertmode = NOT_SET_VALUES; 1426 a->nvec = oldmat->nvec; 1427 a->donotstash = oldmat->donotstash; 1428 1429 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1430 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1431 1432 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1433 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1434 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1435 1436 *newmat = mat; 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1442 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1443 { 1444 PetscErrorCode ierr; 1445 PetscMPIInt rank,size; 1446 PetscInt *rowners,i,m,nz,j; 1447 PetscScalar *array,*vals,*vals_ptr; 1448 1449 PetscFunctionBegin; 1450 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1451 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1452 1453 /* determine ownership of all rows */ 1454 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1455 else m = newmat->rmap->n; 1456 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1457 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1458 rowners[0] = 0; 1459 for (i=2; i<=size; i++) { 1460 rowners[i] += rowners[i-1]; 1461 } 1462 1463 if (!sizesset) { 1464 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1465 } 1466 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1467 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1468 1469 if (!rank) { 1470 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1471 1472 /* read in my part of the matrix numerical values */ 1473 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1474 1475 /* insert into matrix-by row (this is why cannot directly read into array */ 1476 vals_ptr = vals; 1477 for (i=0; i<m; i++) { 1478 for (j=0; j<N; j++) { 1479 array[i + j*m] = *vals_ptr++; 1480 } 1481 } 1482 1483 /* read in other processors and ship out */ 1484 for (i=1; i<size; i++) { 1485 nz = (rowners[i+1] - rowners[i])*N; 1486 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1487 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1488 } 1489 } else { 1490 /* receive numeric values */ 1491 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1492 1493 /* receive message of values*/ 1494 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1495 1496 /* insert into matrix-by row (this is why cannot directly read into array */ 1497 vals_ptr = vals; 1498 for (i=0; i<m; i++) { 1499 for (j=0; j<N; j++) { 1500 array[i + j*m] = *vals_ptr++; 1501 } 1502 } 1503 } 1504 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1505 ierr = PetscFree(rowners);CHKERRQ(ierr); 1506 ierr = PetscFree(vals);CHKERRQ(ierr); 1507 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1508 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatLoad_MPIDense" 1514 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1515 { 1516 PetscScalar *vals,*svals; 1517 MPI_Comm comm = ((PetscObject)viewer)->comm; 1518 MPI_Status status; 1519 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1520 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1521 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1522 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1523 int fd; 1524 PetscErrorCode ierr; 1525 1526 PetscFunctionBegin; 1527 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1528 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1529 if (!rank) { 1530 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1531 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1532 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1533 } 1534 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1535 1536 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1537 M = header[1]; N = header[2]; nz = header[3]; 1538 1539 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1540 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1541 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1542 1543 /* If global sizes are set, check if they are consistent with that given in the file */ 1544 if (sizesset) { 1545 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1546 } 1547 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1548 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1549 1550 /* 1551 Handle case where matrix is stored on disk as a dense matrix 1552 */ 1553 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1554 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 /* determine ownership of all rows */ 1559 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1560 else m = PetscMPIIntCast(newmat->rmap->n); 1561 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1562 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1563 rowners[0] = 0; 1564 for (i=2; i<=size; i++) { 1565 rowners[i] += rowners[i-1]; 1566 } 1567 rstart = rowners[rank]; 1568 rend = rowners[rank+1]; 1569 1570 /* distribute row lengths to all processors */ 1571 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1572 if (!rank) { 1573 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1574 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1575 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1576 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1577 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1578 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1579 } else { 1580 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1581 } 1582 1583 if (!rank) { 1584 /* calculate the number of nonzeros on each processor */ 1585 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1586 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1587 for (i=0; i<size; i++) { 1588 for (j=rowners[i]; j< rowners[i+1]; j++) { 1589 procsnz[i] += rowlengths[j]; 1590 } 1591 } 1592 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1593 1594 /* determine max buffer needed and allocate it */ 1595 maxnz = 0; 1596 for (i=0; i<size; i++) { 1597 maxnz = PetscMax(maxnz,procsnz[i]); 1598 } 1599 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1600 1601 /* read in my part of the matrix column indices */ 1602 nz = procsnz[0]; 1603 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1604 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1605 1606 /* read in every one elses and ship off */ 1607 for (i=1; i<size; i++) { 1608 nz = procsnz[i]; 1609 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1610 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1611 } 1612 ierr = PetscFree(cols);CHKERRQ(ierr); 1613 } else { 1614 /* determine buffer space needed for message */ 1615 nz = 0; 1616 for (i=0; i<m; i++) { 1617 nz += ourlens[i]; 1618 } 1619 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1620 1621 /* receive message of column indices*/ 1622 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1623 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1624 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1625 } 1626 1627 /* loop over local rows, determining number of off diagonal entries */ 1628 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1629 jj = 0; 1630 for (i=0; i<m; i++) { 1631 for (j=0; j<ourlens[i]; j++) { 1632 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1633 jj++; 1634 } 1635 } 1636 1637 /* create our matrix */ 1638 for (i=0; i<m; i++) { 1639 ourlens[i] -= offlens[i]; 1640 } 1641 1642 if (!sizesset) { 1643 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1644 } 1645 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1646 for (i=0; i<m; i++) { 1647 ourlens[i] += offlens[i]; 1648 } 1649 1650 if (!rank) { 1651 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1652 1653 /* read in my part of the matrix numerical values */ 1654 nz = procsnz[0]; 1655 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1656 1657 /* insert into matrix */ 1658 jj = rstart; 1659 smycols = mycols; 1660 svals = vals; 1661 for (i=0; i<m; i++) { 1662 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1663 smycols += ourlens[i]; 1664 svals += ourlens[i]; 1665 jj++; 1666 } 1667 1668 /* read in other processors and ship out */ 1669 for (i=1; i<size; i++) { 1670 nz = procsnz[i]; 1671 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1672 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1673 } 1674 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1675 } else { 1676 /* receive numeric values */ 1677 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1678 1679 /* receive message of values*/ 1680 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1681 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1682 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1683 1684 /* insert into matrix */ 1685 jj = rstart; 1686 smycols = mycols; 1687 svals = vals; 1688 for (i=0; i<m; i++) { 1689 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1690 smycols += ourlens[i]; 1691 svals += ourlens[i]; 1692 jj++; 1693 } 1694 } 1695 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 1696 ierr = PetscFree(vals);CHKERRQ(ierr); 1697 ierr = PetscFree(mycols);CHKERRQ(ierr); 1698 ierr = PetscFree(rowners);CHKERRQ(ierr); 1699 1700 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1701 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1702 PetscFunctionReturn(0); 1703 } 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "MatEqual_MPIDense" 1707 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1708 { 1709 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1710 Mat a,b; 1711 PetscBool flg; 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 a = matA->A; 1716 b = matB->A; 1717 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1718 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1719 PetscFunctionReturn(0); 1720 } 1721 1722