1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) { 34 *B = mat->A; 35 } else { 36 *B = A; 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatGetRow_MPIDense" 43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 48 49 PetscFunctionBegin; 50 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 51 lrow = row - rstart; 52 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatRestoreRow_MPIDense" 58 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 59 { 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 64 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 71 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 72 { 73 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 74 PetscErrorCode ierr; 75 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 76 PetscScalar *array; 77 MPI_Comm comm; 78 Mat B; 79 80 PetscFunctionBegin; 81 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 82 83 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 84 if (!B) { 85 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 86 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 87 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 88 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 89 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 90 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 91 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 93 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 94 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 95 *a = B; 96 ierr = MatDestroy(&B);CHKERRQ(ierr); 97 } else { 98 *a = B; 99 } 100 PetscFunctionReturn(0); 101 } 102 EXTERN_C_END 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatSetValues_MPIDense" 106 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 107 { 108 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 109 PetscErrorCode ierr; 110 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 111 PetscBool roworiented = A->roworiented; 112 113 PetscFunctionBegin; 114 if (v) PetscValidScalarPointer(v,6); 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 mat->assembled = PETSC_FALSE; 132 if (roworiented) { 133 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 134 } else { 135 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 136 } 137 } 138 } 139 } 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatGetValues_MPIDense" 145 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 146 { 147 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 148 PetscErrorCode ierr; 149 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 150 151 PetscFunctionBegin; 152 for (i=0; i<m; i++) { 153 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 154 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 155 if (idxm[i] >= rstart && idxm[i] < rend) { 156 row = idxm[i] - rstart; 157 for (j=0; j<n; j++) { 158 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 159 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 161 } 162 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 163 } 164 PetscFunctionReturn(0); 165 } 166 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatDenseGetArray_MPIDense" 169 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 170 { 171 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 172 PetscErrorCode ierr; 173 174 PetscFunctionBegin; 175 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 #undef __FUNCT__ 180 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 181 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 182 { 183 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 184 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 185 PetscErrorCode ierr; 186 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 187 const PetscInt *irow,*icol; 188 PetscScalar *av,*bv,*v = lmat->v; 189 Mat newmat; 190 IS iscol_local; 191 192 PetscFunctionBegin; 193 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 194 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 195 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 196 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 197 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 198 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 199 200 /* No parallel redistribution currently supported! Should really check each index set 201 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 202 original matrix! */ 203 204 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 205 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 206 207 /* Check submatrix call */ 208 if (scall == MAT_REUSE_MATRIX) { 209 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 210 /* Really need to test rows and column sizes! */ 211 newmat = *B; 212 } else { 213 /* Create and fill new matrix */ 214 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 215 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 216 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 217 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 218 } 219 220 /* Now extract the data pointers and do the copy, column at a time */ 221 newmatd = (Mat_MPIDense*)newmat->data; 222 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 223 224 for (i=0; i<Ncols; i++) { 225 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 226 for (j=0; j<nrows; j++) { 227 *bv++ = av[irow[j] - rstart]; 228 } 229 } 230 231 /* Assemble the matrices so that the correct flags are set */ 232 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 234 235 /* Free work space */ 236 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 237 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 238 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 239 *B = newmat; 240 PetscFunctionReturn(0); 241 } 242 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 245 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 246 { 247 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 248 PetscErrorCode ierr; 249 250 PetscFunctionBegin; 251 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 257 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 258 { 259 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 260 MPI_Comm comm = ((PetscObject)mat)->comm; 261 PetscErrorCode ierr; 262 PetscInt nstash,reallocs; 263 InsertMode addv; 264 265 PetscFunctionBegin; 266 /* make sure all processors are either in INSERTMODE or ADDMODE */ 267 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 268 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 269 mat->insertmode = addv; /* in case this processor had no cache */ 270 271 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 272 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 273 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 279 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 280 { 281 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 282 PetscErrorCode ierr; 283 PetscInt i,*row,*col,flg,j,rstart,ncols; 284 PetscMPIInt n; 285 PetscScalar *val; 286 InsertMode addv=mat->insertmode; 287 288 PetscFunctionBegin; 289 /* wait on receives */ 290 while (1) { 291 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 292 if (!flg) break; 293 294 for (i=0; i<n;) { 295 /* Now identify the consecutive vals belonging to the same row */ 296 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 297 if (j < n) ncols = j-i; 298 else ncols = n-i; 299 /* Now assemble all these values with a single function call */ 300 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 301 i = j; 302 } 303 } 304 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 305 306 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 307 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 308 309 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 310 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatZeroEntries_MPIDense" 317 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 318 { 319 PetscErrorCode ierr; 320 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 321 322 PetscFunctionBegin; 323 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 /* the code does not do the diagonal entries correctly unless the 328 matrix is square and the column and row owerships are identical. 329 This is a BUG. The only way to fix it seems to be to access 330 mdn->A and mdn->B directly and not through the MatZeroRows() 331 routine. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatZeroRows_MPIDense" 335 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 336 { 337 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 338 PetscErrorCode ierr; 339 PetscInt i,*owners = A->rmap->range; 340 PetscInt *nprocs,j,idx,nsends; 341 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 342 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 343 PetscInt *lens,*lrows,*values; 344 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 345 MPI_Comm comm = ((PetscObject)A)->comm; 346 MPI_Request *send_waits,*recv_waits; 347 MPI_Status recv_status,*send_status; 348 PetscBool found; 349 const PetscScalar *xx; 350 PetscScalar *bb; 351 352 PetscFunctionBegin; 353 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Only handles square matrices"); 354 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 355 /* first count number of contributors to each processor */ 356 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 357 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 358 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 359 for (i=0; i<N; i++) { 360 idx = rows[i]; 361 found = PETSC_FALSE; 362 for (j=0; j<size; j++) { 363 if (idx >= owners[j] && idx < owners[j+1]) { 364 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 365 } 366 } 367 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 368 } 369 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 370 371 /* inform other processors of number of messages and max length*/ 372 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 373 374 /* post receives: */ 375 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 376 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 377 for (i=0; i<nrecvs; i++) { 378 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 379 } 380 381 /* do sends: 382 1) starts[i] gives the starting index in svalues for stuff going to 383 the ith processor 384 */ 385 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 386 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 387 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 388 starts[0] = 0; 389 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 390 for (i=0; i<N; i++) { 391 svalues[starts[owner[i]]++] = rows[i]; 392 } 393 394 starts[0] = 0; 395 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 396 count = 0; 397 for (i=0; i<size; i++) { 398 if (nprocs[2*i+1]) { 399 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 400 } 401 } 402 ierr = PetscFree(starts);CHKERRQ(ierr); 403 404 base = owners[rank]; 405 406 /* wait on receives */ 407 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 408 count = nrecvs; 409 slen = 0; 410 while (count) { 411 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 412 /* unpack receives into our local space */ 413 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 414 source[imdex] = recv_status.MPI_SOURCE; 415 lens[imdex] = n; 416 slen += n; 417 count--; 418 } 419 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 420 421 /* move the data into the send scatter */ 422 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 423 count = 0; 424 for (i=0; i<nrecvs; i++) { 425 values = rvalues + i*nmax; 426 for (j=0; j<lens[i]; j++) { 427 lrows[count++] = values[j] - base; 428 } 429 } 430 ierr = PetscFree(rvalues);CHKERRQ(ierr); 431 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 432 ierr = PetscFree(owner);CHKERRQ(ierr); 433 ierr = PetscFree(nprocs);CHKERRQ(ierr); 434 435 /* fix right hand side if needed */ 436 if (x && b) { 437 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 438 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 439 for (i=0; i<slen; i++) { 440 bb[lrows[i]] = diag*xx[lrows[i]]; 441 } 442 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 443 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 444 } 445 446 /* actually zap the local rows */ 447 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 448 if (diag != 0.0) { 449 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 450 PetscInt m = ll->lda, i; 451 452 for (i=0; i<slen; i++) { 453 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 454 } 455 } 456 ierr = PetscFree(lrows);CHKERRQ(ierr); 457 458 /* wait on sends */ 459 if (nsends) { 460 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 461 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 462 ierr = PetscFree(send_status);CHKERRQ(ierr); 463 } 464 ierr = PetscFree(send_waits);CHKERRQ(ierr); 465 ierr = PetscFree(svalues);CHKERRQ(ierr); 466 467 PetscFunctionReturn(0); 468 } 469 470 #undef __FUNCT__ 471 #define __FUNCT__ "MatMult_MPIDense" 472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 477 PetscFunctionBegin; 478 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 480 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatMultAdd_MPIDense" 486 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 487 { 488 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 489 PetscErrorCode ierr; 490 491 PetscFunctionBegin; 492 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 493 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 494 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatMultTranspose_MPIDense" 500 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 501 { 502 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 503 PetscErrorCode ierr; 504 PetscScalar zero = 0.0; 505 506 PetscFunctionBegin; 507 ierr = VecSet(yy,zero);CHKERRQ(ierr); 508 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 509 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 510 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 511 PetscFunctionReturn(0); 512 } 513 514 #undef __FUNCT__ 515 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 516 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 517 { 518 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 519 PetscErrorCode ierr; 520 521 PetscFunctionBegin; 522 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 523 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 524 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 525 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 #undef __FUNCT__ 530 #define __FUNCT__ "MatGetDiagonal_MPIDense" 531 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 532 { 533 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 534 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 535 PetscErrorCode ierr; 536 PetscInt len,i,n,m = A->rmap->n,radd; 537 PetscScalar *x,zero = 0.0; 538 539 PetscFunctionBegin; 540 ierr = VecSet(v,zero);CHKERRQ(ierr); 541 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 542 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 543 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 544 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 545 radd = A->rmap->rstart*m; 546 for (i=0; i<len; i++) { 547 x[i] = aloc->v[radd + i*m + i]; 548 } 549 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 550 PetscFunctionReturn(0); 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "MatDestroy_MPIDense" 555 PetscErrorCode MatDestroy_MPIDense(Mat mat) 556 { 557 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 558 PetscErrorCode ierr; 559 560 PetscFunctionBegin; 561 562 #if defined(PETSC_USE_LOG) 563 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 564 #endif 565 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 566 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 567 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 568 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 569 570 ierr = PetscFree(mat->data);CHKERRQ(ierr); 571 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatView_MPIDense_Binary" 582 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 583 { 584 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 585 PetscErrorCode ierr; 586 PetscViewerFormat format; 587 int fd; 588 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 589 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 590 PetscScalar *work,*v,*vv; 591 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 592 593 PetscFunctionBegin; 594 if (mdn->size == 1) { 595 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 596 } else { 597 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 598 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 599 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 600 601 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 602 if (format == PETSC_VIEWER_NATIVE) { 603 604 if (!rank) { 605 /* store the matrix as a dense matrix */ 606 header[0] = MAT_FILE_CLASSID; 607 header[1] = mat->rmap->N; 608 header[2] = N; 609 header[3] = MATRIX_BINARY_FORMAT_DENSE; 610 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 611 612 /* get largest work array needed for transposing array */ 613 mmax = mat->rmap->n; 614 for (i=1; i<size; i++) { 615 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 616 } 617 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 618 619 /* write out local array, by rows */ 620 m = mat->rmap->n; 621 v = a->v; 622 for (j=0; j<N; j++) { 623 for (i=0; i<m; i++) { 624 work[j + i*N] = *v++; 625 } 626 } 627 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 628 /* get largest work array to receive messages from other processes, excludes process zero */ 629 mmax = 0; 630 for (i=1; i<size; i++) { 631 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 632 } 633 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 634 for (k = 1; k < size; k++) { 635 v = vv; 636 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 637 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 638 639 for (j = 0; j < N; j++) { 640 for (i = 0; i < m; i++) { 641 work[j + i*N] = *v++; 642 } 643 } 644 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 645 } 646 ierr = PetscFree(work);CHKERRQ(ierr); 647 ierr = PetscFree(vv);CHKERRQ(ierr); 648 } else { 649 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 650 } 651 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 652 } 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 658 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 659 { 660 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 661 PetscErrorCode ierr; 662 PetscMPIInt size = mdn->size,rank = mdn->rank; 663 PetscViewerType vtype; 664 PetscBool iascii,isdraw; 665 PetscViewer sviewer; 666 PetscViewerFormat format; 667 668 PetscFunctionBegin; 669 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 670 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 671 if (iascii) { 672 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 673 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 674 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 675 MatInfo info; 676 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 677 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 678 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 679 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 680 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 681 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } else if (format == PETSC_VIEWER_ASCII_INFO) { 684 PetscFunctionReturn(0); 685 } 686 } else if (isdraw) { 687 PetscDraw draw; 688 PetscBool isnull; 689 690 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 691 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 692 if (isnull) PetscFunctionReturn(0); 693 } 694 695 if (size == 1) { 696 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 697 } else { 698 /* assemble the entire matrix onto first processor. */ 699 Mat A; 700 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 701 PetscInt *cols; 702 PetscScalar *vals; 703 704 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 705 if (!rank) { 706 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 707 } else { 708 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 709 } 710 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 711 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 712 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);CHKERRQ(ierr); 713 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 714 715 /* Copy the matrix ... This isn't the most efficient means, 716 but it's quick for now */ 717 A->insertmode = INSERT_VALUES; 718 row = mat->rmap->rstart; m = mdn->A->rmap->n; 719 for (i=0; i<m; i++) { 720 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 721 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 722 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 723 row++; 724 } 725 726 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 727 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 728 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 729 if (!rank) { 730 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 731 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 732 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 733 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 734 } 735 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 736 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 737 ierr = MatDestroy(&A);CHKERRQ(ierr); 738 } 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatView_MPIDense" 744 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 745 { 746 PetscErrorCode ierr; 747 PetscBool iascii,isbinary,isdraw,issocket; 748 749 PetscFunctionBegin; 750 751 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 752 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 753 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 754 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 755 756 if (iascii || issocket || isdraw) { 757 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 758 } else if (isbinary) { 759 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 760 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatGetInfo_MPIDense" 766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 767 { 768 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 769 Mat mdn = mat->A; 770 PetscErrorCode ierr; 771 PetscReal isend[5],irecv[5]; 772 773 PetscFunctionBegin; 774 info->block_size = 1.0; 775 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 776 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 777 isend[3] = info->memory; isend[4] = info->mallocs; 778 if (flag == MAT_LOCAL) { 779 info->nz_used = isend[0]; 780 info->nz_allocated = isend[1]; 781 info->nz_unneeded = isend[2]; 782 info->memory = isend[3]; 783 info->mallocs = isend[4]; 784 } else if (flag == MAT_GLOBAL_MAX) { 785 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 786 info->nz_used = irecv[0]; 787 info->nz_allocated = irecv[1]; 788 info->nz_unneeded = irecv[2]; 789 info->memory = irecv[3]; 790 info->mallocs = irecv[4]; 791 } else if (flag == MAT_GLOBAL_SUM) { 792 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 793 info->nz_used = irecv[0]; 794 info->nz_allocated = irecv[1]; 795 info->nz_unneeded = irecv[2]; 796 info->memory = irecv[3]; 797 info->mallocs = irecv[4]; 798 } 799 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 800 info->fill_ratio_needed = 0; 801 info->factor_mallocs = 0; 802 PetscFunctionReturn(0); 803 } 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatSetOption_MPIDense" 807 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 808 { 809 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 810 PetscErrorCode ierr; 811 812 PetscFunctionBegin; 813 switch (op) { 814 case MAT_NEW_NONZERO_LOCATIONS: 815 case MAT_NEW_NONZERO_LOCATION_ERR: 816 case MAT_NEW_NONZERO_ALLOCATION_ERR: 817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 818 break; 819 case MAT_ROW_ORIENTED: 820 a->roworiented = flg; 821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 822 break; 823 case MAT_NEW_DIAGONALS: 824 case MAT_KEEP_NONZERO_PATTERN: 825 case MAT_USE_HASH_TABLE: 826 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 827 break; 828 case MAT_IGNORE_OFF_PROC_ENTRIES: 829 a->donotstash = flg; 830 break; 831 case MAT_SYMMETRIC: 832 case MAT_STRUCTURALLY_SYMMETRIC: 833 case MAT_HERMITIAN: 834 case MAT_SYMMETRY_ETERNAL: 835 case MAT_IGNORE_LOWER_TRIANGULAR: 836 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 837 break; 838 default: 839 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 840 } 841 PetscFunctionReturn(0); 842 } 843 844 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatDiagonalScale_MPIDense" 847 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 848 { 849 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 850 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 851 PetscScalar *l,*r,x,*v; 852 PetscErrorCode ierr; 853 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 854 855 PetscFunctionBegin; 856 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 857 if (ll) { 858 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 859 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 860 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 861 for (i=0; i<m; i++) { 862 x = l[i]; 863 v = mat->v + i; 864 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 865 } 866 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 867 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 868 } 869 if (rr) { 870 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 871 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 872 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 875 for (i=0; i<n; i++) { 876 x = r[i]; 877 v = mat->v + i*m; 878 for (j=0; j<m; j++) { (*v++) *= x;} 879 } 880 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 881 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 882 } 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatNorm_MPIDense" 888 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 889 { 890 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 891 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 892 PetscErrorCode ierr; 893 PetscInt i,j; 894 PetscReal sum = 0.0; 895 PetscScalar *v = mat->v; 896 897 PetscFunctionBegin; 898 if (mdn->size == 1) { 899 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 900 } else { 901 if (type == NORM_FROBENIUS) { 902 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 903 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 904 } 905 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 906 *nrm = PetscSqrtReal(*nrm); 907 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 908 } else if (type == NORM_1) { 909 PetscReal *tmp,*tmp2; 910 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 911 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 912 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 913 *nrm = 0.0; 914 v = mat->v; 915 for (j=0; j<mdn->A->cmap->n; j++) { 916 for (i=0; i<mdn->A->rmap->n; i++) { 917 tmp[j] += PetscAbsScalar(*v); v++; 918 } 919 } 920 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 921 for (j=0; j<A->cmap->N; j++) { 922 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 923 } 924 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 925 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 926 } else if (type == NORM_INFINITY) { /* max row norm */ 927 PetscReal ntemp; 928 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 929 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 930 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 931 } 932 PetscFunctionReturn(0); 933 } 934 935 #undef __FUNCT__ 936 #define __FUNCT__ "MatTranspose_MPIDense" 937 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 938 { 939 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 940 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 941 Mat B; 942 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 943 PetscErrorCode ierr; 944 PetscInt j,i; 945 PetscScalar *v; 946 947 PetscFunctionBegin; 948 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 949 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 950 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 951 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 952 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 953 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 954 } else { 955 B = *matout; 956 } 957 958 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 959 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 960 for (i=0; i<m; i++) rwork[i] = rstart + i; 961 for (j=0; j<n; j++) { 962 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 963 v += m; 964 } 965 ierr = PetscFree(rwork);CHKERRQ(ierr); 966 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 968 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 969 *matout = B; 970 } else { 971 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 972 } 973 PetscFunctionReturn(0); 974 } 975 976 977 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 978 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatSetUp_MPIDense" 982 PetscErrorCode MatSetUp_MPIDense(Mat A) 983 { 984 PetscErrorCode ierr; 985 986 PetscFunctionBegin; 987 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatAXPY_MPIDense" 993 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 994 { 995 PetscErrorCode ierr; 996 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 997 998 PetscFunctionBegin; 999 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatConjugate_MPIDense" 1005 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1006 { 1007 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1008 PetscErrorCode ierr; 1009 1010 PetscFunctionBegin; 1011 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatRealPart_MPIDense" 1017 PetscErrorCode MatRealPart_MPIDense(Mat A) 1018 { 1019 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1020 PetscErrorCode ierr; 1021 1022 PetscFunctionBegin; 1023 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1029 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1030 { 1031 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1032 PetscErrorCode ierr; 1033 1034 PetscFunctionBegin; 1035 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1040 #undef __FUNCT__ 1041 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1042 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1043 { 1044 PetscErrorCode ierr; 1045 PetscInt i,n; 1046 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1047 PetscReal *work; 1048 1049 PetscFunctionBegin; 1050 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 1051 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 1052 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1053 if (type == NORM_2) { 1054 for (i=0; i<n; i++) work[i] *= work[i]; 1055 } 1056 if (type == NORM_INFINITY) { 1057 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1058 } else { 1059 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1060 } 1061 ierr = PetscFree(work);CHKERRQ(ierr); 1062 if (type == NORM_2) { 1063 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatSetRandom_MPIDense" 1070 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1071 { 1072 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1073 PetscErrorCode ierr; 1074 PetscScalar *a; 1075 PetscInt m,n,i; 1076 1077 PetscFunctionBegin; 1078 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1079 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1080 for (i=0; i<m*n; i++) { 1081 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1082 } 1083 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /* -------------------------------------------------------------------*/ 1088 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1089 MatGetRow_MPIDense, 1090 MatRestoreRow_MPIDense, 1091 MatMult_MPIDense, 1092 /* 4*/ MatMultAdd_MPIDense, 1093 MatMultTranspose_MPIDense, 1094 MatMultTransposeAdd_MPIDense, 1095 0, 1096 0, 1097 0, 1098 /*10*/ 0, 1099 0, 1100 0, 1101 0, 1102 MatTranspose_MPIDense, 1103 /*15*/ MatGetInfo_MPIDense, 1104 MatEqual_MPIDense, 1105 MatGetDiagonal_MPIDense, 1106 MatDiagonalScale_MPIDense, 1107 MatNorm_MPIDense, 1108 /*20*/ MatAssemblyBegin_MPIDense, 1109 MatAssemblyEnd_MPIDense, 1110 MatSetOption_MPIDense, 1111 MatZeroEntries_MPIDense, 1112 /*24*/ MatZeroRows_MPIDense, 1113 0, 1114 0, 1115 0, 1116 0, 1117 /*29*/ MatSetUp_MPIDense, 1118 0, 1119 0, 1120 0, 1121 0, 1122 /*34*/ MatDuplicate_MPIDense, 1123 0, 1124 0, 1125 0, 1126 0, 1127 /*39*/ MatAXPY_MPIDense, 1128 MatGetSubMatrices_MPIDense, 1129 0, 1130 MatGetValues_MPIDense, 1131 0, 1132 /*44*/ 0, 1133 MatScale_MPIDense, 1134 0, 1135 0, 1136 0, 1137 /*49*/ MatSetRandom_MPIDense, 1138 0, 1139 0, 1140 0, 1141 0, 1142 /*54*/ 0, 1143 0, 1144 0, 1145 0, 1146 0, 1147 /*59*/ MatGetSubMatrix_MPIDense, 1148 MatDestroy_MPIDense, 1149 MatView_MPIDense, 1150 0, 1151 0, 1152 /*64*/ 0, 1153 0, 1154 0, 1155 0, 1156 0, 1157 /*69*/ 0, 1158 0, 1159 0, 1160 0, 1161 0, 1162 /*74*/ 0, 1163 0, 1164 0, 1165 0, 1166 0, 1167 /*79*/ 0, 1168 0, 1169 0, 1170 0, 1171 /*83*/ MatLoad_MPIDense, 1172 0, 1173 0, 1174 0, 1175 0, 1176 0, 1177 /*89*/ 1178 0, 1179 0, 1180 0, 1181 0, 1182 0, 1183 /*94*/ 0, 1184 0, 1185 0, 1186 0, 1187 0, 1188 /*99*/ 0, 1189 0, 1190 0, 1191 MatConjugate_MPIDense, 1192 0, 1193 /*104*/0, 1194 MatRealPart_MPIDense, 1195 MatImaginaryPart_MPIDense, 1196 0, 1197 0, 1198 /*109*/0, 1199 0, 1200 0, 1201 0, 1202 0, 1203 /*114*/0, 1204 0, 1205 0, 1206 0, 1207 0, 1208 /*119*/0, 1209 0, 1210 0, 1211 0, 1212 0, 1213 /*124*/0, 1214 MatGetColumnNorms_MPIDense 1215 }; 1216 1217 EXTERN_C_BEGIN 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1220 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1221 { 1222 Mat_MPIDense *a; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 mat->preallocated = PETSC_TRUE; 1227 /* Note: For now, when data is specified above, this assumes the user correctly 1228 allocates the local dense storage space. We should add error checking. */ 1229 1230 a = (Mat_MPIDense*)mat->data; 1231 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1232 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1233 a->nvec = mat->cmap->n; 1234 1235 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1236 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1237 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1238 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1239 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 EXTERN_C_END 1243 1244 EXTERN_C_BEGIN 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatCreate_MPIDense" 1247 PetscErrorCode MatCreate_MPIDense(Mat mat) 1248 { 1249 Mat_MPIDense *a; 1250 PetscErrorCode ierr; 1251 1252 PetscFunctionBegin; 1253 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1254 mat->data = (void*)a; 1255 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1256 1257 mat->insertmode = NOT_SET_VALUES; 1258 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1259 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1260 1261 /* build cache for off array entries formed */ 1262 a->donotstash = PETSC_FALSE; 1263 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1264 1265 /* stuff used for matrix vector multiply */ 1266 a->lvec = 0; 1267 a->Mvctx = 0; 1268 a->roworiented = PETSC_TRUE; 1269 1270 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1272 1273 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1274 "MatGetDiagonalBlock_MPIDense", 1275 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1277 "MatMPIDenseSetPreallocation_MPIDense", 1278 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1280 "MatMatMult_MPIAIJ_MPIDense", 1281 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1283 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1284 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1286 "MatMatMultNumeric_MPIAIJ_MPIDense", 1287 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1288 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1289 1290 PetscFunctionReturn(0); 1291 } 1292 EXTERN_C_END 1293 1294 /*MC 1295 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1296 1297 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1298 and MATMPIDENSE otherwise. 1299 1300 Options Database Keys: 1301 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1302 1303 Level: beginner 1304 1305 1306 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1307 M*/ 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1311 /*@C 1312 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1313 1314 Not collective 1315 1316 Input Parameters: 1317 . A - the matrix 1318 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1319 to control all matrix memory allocation. 1320 1321 Notes: 1322 The dense format is fully compatible with standard Fortran 77 1323 storage by columns. 1324 1325 The data input variable is intended primarily for Fortran programmers 1326 who wish to allocate their own matrix memory space. Most users should 1327 set data=PETSC_NULL. 1328 1329 Level: intermediate 1330 1331 .keywords: matrix,dense, parallel 1332 1333 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1334 @*/ 1335 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1336 { 1337 PetscErrorCode ierr; 1338 1339 PetscFunctionBegin; 1340 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatCreateDense" 1346 /*@C 1347 MatCreateDense - Creates a parallel matrix in dense format. 1348 1349 Collective on MPI_Comm 1350 1351 Input Parameters: 1352 + comm - MPI communicator 1353 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1354 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1355 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1356 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1357 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1358 to control all matrix memory allocation. 1359 1360 Output Parameter: 1361 . A - the matrix 1362 1363 Notes: 1364 The dense format is fully compatible with standard Fortran 77 1365 storage by columns. 1366 1367 The data input variable is intended primarily for Fortran programmers 1368 who wish to allocate their own matrix memory space. Most users should 1369 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1370 1371 The user MUST specify either the local or global matrix dimensions 1372 (possibly both). 1373 1374 Level: intermediate 1375 1376 .keywords: matrix,dense, parallel 1377 1378 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1379 @*/ 1380 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1381 { 1382 PetscErrorCode ierr; 1383 PetscMPIInt size; 1384 1385 PetscFunctionBegin; 1386 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1387 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1388 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1389 if (size > 1) { 1390 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1391 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1392 } else { 1393 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1394 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1395 } 1396 PetscFunctionReturn(0); 1397 } 1398 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatDuplicate_MPIDense" 1401 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1402 { 1403 Mat mat; 1404 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1405 PetscErrorCode ierr; 1406 1407 PetscFunctionBegin; 1408 *newmat = 0; 1409 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1410 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1411 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1412 a = (Mat_MPIDense*)mat->data; 1413 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1414 1415 mat->factortype = A->factortype; 1416 mat->assembled = PETSC_TRUE; 1417 mat->preallocated = PETSC_TRUE; 1418 1419 a->size = oldmat->size; 1420 a->rank = oldmat->rank; 1421 mat->insertmode = NOT_SET_VALUES; 1422 a->nvec = oldmat->nvec; 1423 a->donotstash = oldmat->donotstash; 1424 1425 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1426 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1427 1428 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1429 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1430 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1431 1432 *newmat = mat; 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1438 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1439 { 1440 PetscErrorCode ierr; 1441 PetscMPIInt rank,size; 1442 PetscInt *rowners,i,m,nz,j; 1443 PetscScalar *array,*vals,*vals_ptr; 1444 1445 PetscFunctionBegin; 1446 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1447 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1448 1449 /* determine ownership of all rows */ 1450 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1451 else m = newmat->rmap->n; 1452 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1453 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1454 rowners[0] = 0; 1455 for (i=2; i<=size; i++) { 1456 rowners[i] += rowners[i-1]; 1457 } 1458 1459 if (!sizesset) { 1460 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1461 } 1462 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1463 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1464 1465 if (!rank) { 1466 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1467 1468 /* read in my part of the matrix numerical values */ 1469 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1470 1471 /* insert into matrix-by row (this is why cannot directly read into array */ 1472 vals_ptr = vals; 1473 for (i=0; i<m; i++) { 1474 for (j=0; j<N; j++) { 1475 array[i + j*m] = *vals_ptr++; 1476 } 1477 } 1478 1479 /* read in other processors and ship out */ 1480 for (i=1; i<size; i++) { 1481 nz = (rowners[i+1] - rowners[i])*N; 1482 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1483 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1484 } 1485 } else { 1486 /* receive numeric values */ 1487 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1488 1489 /* receive message of values*/ 1490 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1491 1492 /* insert into matrix-by row (this is why cannot directly read into array */ 1493 vals_ptr = vals; 1494 for (i=0; i<m; i++) { 1495 for (j=0; j<N; j++) { 1496 array[i + j*m] = *vals_ptr++; 1497 } 1498 } 1499 } 1500 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1501 ierr = PetscFree(rowners);CHKERRQ(ierr); 1502 ierr = PetscFree(vals);CHKERRQ(ierr); 1503 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1504 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1505 PetscFunctionReturn(0); 1506 } 1507 1508 #undef __FUNCT__ 1509 #define __FUNCT__ "MatLoad_MPIDense" 1510 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1511 { 1512 PetscScalar *vals,*svals; 1513 MPI_Comm comm = ((PetscObject)viewer)->comm; 1514 MPI_Status status; 1515 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1516 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1517 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1518 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1519 int fd; 1520 PetscErrorCode ierr; 1521 1522 PetscFunctionBegin; 1523 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1524 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1525 if (!rank) { 1526 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1527 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1528 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1529 } 1530 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1531 1532 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1533 M = header[1]; N = header[2]; nz = header[3]; 1534 1535 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1536 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1537 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1538 1539 /* If global sizes are set, check if they are consistent with that given in the file */ 1540 if (sizesset) { 1541 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1542 } 1543 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1544 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1545 1546 /* 1547 Handle case where matrix is stored on disk as a dense matrix 1548 */ 1549 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1550 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1551 PetscFunctionReturn(0); 1552 } 1553 1554 /* determine ownership of all rows */ 1555 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1556 else m = PetscMPIIntCast(newmat->rmap->n); 1557 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1558 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1559 rowners[0] = 0; 1560 for (i=2; i<=size; i++) { 1561 rowners[i] += rowners[i-1]; 1562 } 1563 rstart = rowners[rank]; 1564 rend = rowners[rank+1]; 1565 1566 /* distribute row lengths to all processors */ 1567 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1568 if (!rank) { 1569 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1570 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1571 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1572 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1573 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1574 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1575 } else { 1576 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1577 } 1578 1579 if (!rank) { 1580 /* calculate the number of nonzeros on each processor */ 1581 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1582 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1583 for (i=0; i<size; i++) { 1584 for (j=rowners[i]; j< rowners[i+1]; j++) { 1585 procsnz[i] += rowlengths[j]; 1586 } 1587 } 1588 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1589 1590 /* determine max buffer needed and allocate it */ 1591 maxnz = 0; 1592 for (i=0; i<size; i++) { 1593 maxnz = PetscMax(maxnz,procsnz[i]); 1594 } 1595 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1596 1597 /* read in my part of the matrix column indices */ 1598 nz = procsnz[0]; 1599 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1600 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1601 1602 /* read in every one elses and ship off */ 1603 for (i=1; i<size; i++) { 1604 nz = procsnz[i]; 1605 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1606 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1607 } 1608 ierr = PetscFree(cols);CHKERRQ(ierr); 1609 } else { 1610 /* determine buffer space needed for message */ 1611 nz = 0; 1612 for (i=0; i<m; i++) { 1613 nz += ourlens[i]; 1614 } 1615 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1616 1617 /* receive message of column indices*/ 1618 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1619 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1620 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1621 } 1622 1623 /* loop over local rows, determining number of off diagonal entries */ 1624 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1625 jj = 0; 1626 for (i=0; i<m; i++) { 1627 for (j=0; j<ourlens[i]; j++) { 1628 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1629 jj++; 1630 } 1631 } 1632 1633 /* create our matrix */ 1634 for (i=0; i<m; i++) { 1635 ourlens[i] -= offlens[i]; 1636 } 1637 1638 if (!sizesset) { 1639 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1640 } 1641 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1642 for (i=0; i<m; i++) { 1643 ourlens[i] += offlens[i]; 1644 } 1645 1646 if (!rank) { 1647 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1648 1649 /* read in my part of the matrix numerical values */ 1650 nz = procsnz[0]; 1651 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1652 1653 /* insert into matrix */ 1654 jj = rstart; 1655 smycols = mycols; 1656 svals = vals; 1657 for (i=0; i<m; i++) { 1658 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1659 smycols += ourlens[i]; 1660 svals += ourlens[i]; 1661 jj++; 1662 } 1663 1664 /* read in other processors and ship out */ 1665 for (i=1; i<size; i++) { 1666 nz = procsnz[i]; 1667 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1668 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1669 } 1670 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1671 } else { 1672 /* receive numeric values */ 1673 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1674 1675 /* receive message of values*/ 1676 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1677 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1678 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1679 1680 /* insert into matrix */ 1681 jj = rstart; 1682 smycols = mycols; 1683 svals = vals; 1684 for (i=0; i<m; i++) { 1685 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1686 smycols += ourlens[i]; 1687 svals += ourlens[i]; 1688 jj++; 1689 } 1690 } 1691 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 1692 ierr = PetscFree(vals);CHKERRQ(ierr); 1693 ierr = PetscFree(mycols);CHKERRQ(ierr); 1694 ierr = PetscFree(rowners);CHKERRQ(ierr); 1695 1696 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1697 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatEqual_MPIDense" 1703 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1704 { 1705 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1706 Mat a,b; 1707 PetscBool flg; 1708 PetscErrorCode ierr; 1709 1710 PetscFunctionBegin; 1711 a = matA->A; 1712 b = matB->A; 1713 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1714 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1715 PetscFunctionReturn(0); 1716 } 1717 1718