1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) { 34 *B = mat->A; 35 } else { 36 *B = A; 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatGetRow_MPIDense" 43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 48 49 PetscFunctionBegin; 50 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 51 lrow = row - rstart; 52 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatRestoreRow_MPIDense" 58 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 59 { 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 64 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 71 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 72 { 73 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 74 PetscErrorCode ierr; 75 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 76 PetscScalar *array; 77 MPI_Comm comm; 78 Mat B; 79 80 PetscFunctionBegin; 81 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 82 83 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 84 if (!B) { 85 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 86 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 87 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 88 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 89 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 90 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 91 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 93 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 94 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 95 *a = B; 96 ierr = MatDestroy(&B);CHKERRQ(ierr); 97 } else { 98 *a = B; 99 } 100 PetscFunctionReturn(0); 101 } 102 EXTERN_C_END 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatSetValues_MPIDense" 106 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 107 { 108 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 109 PetscErrorCode ierr; 110 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 111 PetscBool roworiented = A->roworiented; 112 113 PetscFunctionBegin; 114 if (v) PetscValidScalarPointer(v,6); 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 mat->assembled = PETSC_FALSE; 132 if (roworiented) { 133 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 134 } else { 135 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 136 } 137 } 138 } 139 } 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatGetValues_MPIDense" 145 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 146 { 147 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 148 PetscErrorCode ierr; 149 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 150 151 PetscFunctionBegin; 152 for (i=0; i<m; i++) { 153 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 154 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 155 if (idxm[i] >= rstart && idxm[i] < rend) { 156 row = idxm[i] - rstart; 157 for (j=0; j<n; j++) { 158 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 159 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 161 } 162 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 163 } 164 PetscFunctionReturn(0); 165 } 166 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatDenseGetArray_MPIDense" 169 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 170 { 171 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 172 PetscErrorCode ierr; 173 174 PetscFunctionBegin; 175 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 #undef __FUNCT__ 180 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 181 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 182 { 183 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 184 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 185 PetscErrorCode ierr; 186 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 187 const PetscInt *irow,*icol; 188 PetscScalar *av,*bv,*v = lmat->v; 189 Mat newmat; 190 IS iscol_local; 191 192 PetscFunctionBegin; 193 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 194 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 195 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 196 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 197 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 198 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 199 200 /* No parallel redistribution currently supported! Should really check each index set 201 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 202 original matrix! */ 203 204 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 205 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 206 207 /* Check submatrix call */ 208 if (scall == MAT_REUSE_MATRIX) { 209 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 210 /* Really need to test rows and column sizes! */ 211 newmat = *B; 212 } else { 213 /* Create and fill new matrix */ 214 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 215 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 216 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 217 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 218 } 219 220 /* Now extract the data pointers and do the copy, column at a time */ 221 newmatd = (Mat_MPIDense*)newmat->data; 222 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 223 224 for (i=0; i<Ncols; i++) { 225 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 226 for (j=0; j<nrows; j++) { 227 *bv++ = av[irow[j] - rstart]; 228 } 229 } 230 231 /* Assemble the matrices so that the correct flags are set */ 232 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 234 235 /* Free work space */ 236 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 237 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 238 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 239 *B = newmat; 240 PetscFunctionReturn(0); 241 } 242 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 245 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 246 { 247 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 248 PetscErrorCode ierr; 249 250 PetscFunctionBegin; 251 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 257 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 258 { 259 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 260 MPI_Comm comm = ((PetscObject)mat)->comm; 261 PetscErrorCode ierr; 262 PetscInt nstash,reallocs; 263 InsertMode addv; 264 265 PetscFunctionBegin; 266 /* make sure all processors are either in INSERTMODE or ADDMODE */ 267 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 268 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 269 mat->insertmode = addv; /* in case this processor had no cache */ 270 271 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 272 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 273 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 279 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 280 { 281 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 282 PetscErrorCode ierr; 283 PetscInt i,*row,*col,flg,j,rstart,ncols; 284 PetscMPIInt n; 285 PetscScalar *val; 286 InsertMode addv=mat->insertmode; 287 288 PetscFunctionBegin; 289 /* wait on receives */ 290 while (1) { 291 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 292 if (!flg) break; 293 294 for (i=0; i<n;) { 295 /* Now identify the consecutive vals belonging to the same row */ 296 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 297 if (j < n) ncols = j-i; 298 else ncols = n-i; 299 /* Now assemble all these values with a single function call */ 300 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 301 i = j; 302 } 303 } 304 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 305 306 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 307 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 308 309 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 310 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatZeroEntries_MPIDense" 317 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 318 { 319 PetscErrorCode ierr; 320 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 321 322 PetscFunctionBegin; 323 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 /* the code does not do the diagonal entries correctly unless the 328 matrix is square and the column and row owerships are identical. 329 This is a BUG. The only way to fix it seems to be to access 330 mdn->A and mdn->B directly and not through the MatZeroRows() 331 routine. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatZeroRows_MPIDense" 335 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 336 { 337 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 338 PetscErrorCode ierr; 339 PetscInt i,*owners = A->rmap->range; 340 PetscInt *nprocs,j,idx,nsends; 341 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 342 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 343 PetscInt *lens,*lrows,*values; 344 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 345 MPI_Comm comm = ((PetscObject)A)->comm; 346 MPI_Request *send_waits,*recv_waits; 347 MPI_Status recv_status,*send_status; 348 PetscBool found; 349 const PetscScalar *xx; 350 PetscScalar *bb; 351 352 PetscFunctionBegin; 353 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Only handles square matrices"); 354 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 355 /* first count number of contributors to each processor */ 356 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 357 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 358 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 359 for (i=0; i<N; i++) { 360 idx = rows[i]; 361 found = PETSC_FALSE; 362 for (j=0; j<size; j++) { 363 if (idx >= owners[j] && idx < owners[j+1]) { 364 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 365 } 366 } 367 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 368 } 369 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 370 371 /* inform other processors of number of messages and max length*/ 372 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 373 374 /* post receives: */ 375 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 376 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 377 for (i=0; i<nrecvs; i++) { 378 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 379 } 380 381 /* do sends: 382 1) starts[i] gives the starting index in svalues for stuff going to 383 the ith processor 384 */ 385 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 386 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 387 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 388 starts[0] = 0; 389 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 390 for (i=0; i<N; i++) { 391 svalues[starts[owner[i]]++] = rows[i]; 392 } 393 394 starts[0] = 0; 395 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 396 count = 0; 397 for (i=0; i<size; i++) { 398 if (nprocs[2*i+1]) { 399 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 400 } 401 } 402 ierr = PetscFree(starts);CHKERRQ(ierr); 403 404 base = owners[rank]; 405 406 /* wait on receives */ 407 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 408 count = nrecvs; 409 slen = 0; 410 while (count) { 411 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 412 /* unpack receives into our local space */ 413 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 414 source[imdex] = recv_status.MPI_SOURCE; 415 lens[imdex] = n; 416 slen += n; 417 count--; 418 } 419 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 420 421 /* move the data into the send scatter */ 422 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 423 count = 0; 424 for (i=0; i<nrecvs; i++) { 425 values = rvalues + i*nmax; 426 for (j=0; j<lens[i]; j++) { 427 lrows[count++] = values[j] - base; 428 } 429 } 430 ierr = PetscFree(rvalues);CHKERRQ(ierr); 431 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 432 ierr = PetscFree(owner);CHKERRQ(ierr); 433 ierr = PetscFree(nprocs);CHKERRQ(ierr); 434 435 /* fix right hand side if needed */ 436 if (x && b) { 437 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 438 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 439 for (i=0; i<slen; i++) { 440 bb[lrows[i]] = diag*xx[lrows[i]]; 441 } 442 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 443 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 444 } 445 446 /* actually zap the local rows */ 447 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 448 if (diag != 0.0) { 449 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 450 PetscInt m = ll->lda, i; 451 452 for (i=0; i<slen; i++) { 453 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 454 } 455 } 456 ierr = PetscFree(lrows);CHKERRQ(ierr); 457 458 /* wait on sends */ 459 if (nsends) { 460 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 461 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 462 ierr = PetscFree(send_status);CHKERRQ(ierr); 463 } 464 ierr = PetscFree(send_waits);CHKERRQ(ierr); 465 ierr = PetscFree(svalues);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatMult_MPIDense" 471 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 472 { 473 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 474 PetscErrorCode ierr; 475 476 PetscFunctionBegin; 477 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatMultAdd_MPIDense" 485 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 486 { 487 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 488 PetscErrorCode ierr; 489 490 PetscFunctionBegin; 491 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 492 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 493 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatMultTranspose_MPIDense" 499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 PetscErrorCode ierr; 503 PetscScalar zero = 0.0; 504 505 PetscFunctionBegin; 506 ierr = VecSet(yy,zero);CHKERRQ(ierr); 507 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 508 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 509 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 515 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 516 { 517 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 518 PetscErrorCode ierr; 519 520 PetscFunctionBegin; 521 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 522 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 523 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 524 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 525 PetscFunctionReturn(0); 526 } 527 528 #undef __FUNCT__ 529 #define __FUNCT__ "MatGetDiagonal_MPIDense" 530 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 533 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 534 PetscErrorCode ierr; 535 PetscInt len,i,n,m = A->rmap->n,radd; 536 PetscScalar *x,zero = 0.0; 537 538 PetscFunctionBegin; 539 ierr = VecSet(v,zero);CHKERRQ(ierr); 540 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 541 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 542 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 543 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 544 radd = A->rmap->rstart*m; 545 for (i=0; i<len; i++) { 546 x[i] = aloc->v[radd + i*m + i]; 547 } 548 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 549 PetscFunctionReturn(0); 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatDestroy_MPIDense" 554 PetscErrorCode MatDestroy_MPIDense(Mat mat) 555 { 556 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 557 PetscErrorCode ierr; 558 559 PetscFunctionBegin; 560 #if defined(PETSC_USE_LOG) 561 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 562 #endif 563 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 564 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 565 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 566 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 567 568 ierr = PetscFree(mat->data);CHKERRQ(ierr); 569 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 570 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 571 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatView_MPIDense_Binary" 580 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 581 { 582 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 583 PetscErrorCode ierr; 584 PetscViewerFormat format; 585 int fd; 586 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 587 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 588 PetscScalar *work,*v,*vv; 589 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 590 591 PetscFunctionBegin; 592 if (mdn->size == 1) { 593 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 594 } else { 595 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 596 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 597 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 598 599 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 600 if (format == PETSC_VIEWER_NATIVE) { 601 602 if (!rank) { 603 /* store the matrix as a dense matrix */ 604 header[0] = MAT_FILE_CLASSID; 605 header[1] = mat->rmap->N; 606 header[2] = N; 607 header[3] = MATRIX_BINARY_FORMAT_DENSE; 608 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 609 610 /* get largest work array needed for transposing array */ 611 mmax = mat->rmap->n; 612 for (i=1; i<size; i++) { 613 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 614 } 615 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 616 617 /* write out local array, by rows */ 618 m = mat->rmap->n; 619 v = a->v; 620 for (j=0; j<N; j++) { 621 for (i=0; i<m; i++) { 622 work[j + i*N] = *v++; 623 } 624 } 625 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 626 /* get largest work array to receive messages from other processes, excludes process zero */ 627 mmax = 0; 628 for (i=1; i<size; i++) { 629 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 630 } 631 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 632 for (k = 1; k < size; k++) { 633 v = vv; 634 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 635 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 636 637 for (j = 0; j < N; j++) { 638 for (i = 0; i < m; i++) { 639 work[j + i*N] = *v++; 640 } 641 } 642 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 643 } 644 ierr = PetscFree(work);CHKERRQ(ierr); 645 ierr = PetscFree(vv);CHKERRQ(ierr); 646 } else { 647 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 648 } 649 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 650 } 651 PetscFunctionReturn(0); 652 } 653 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 656 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 657 { 658 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 659 PetscErrorCode ierr; 660 PetscMPIInt size = mdn->size,rank = mdn->rank; 661 PetscViewerType vtype; 662 PetscBool iascii,isdraw; 663 PetscViewer sviewer; 664 PetscViewerFormat format; 665 666 PetscFunctionBegin; 667 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 668 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 669 if (iascii) { 670 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 671 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 672 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 673 MatInfo info; 674 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 675 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 676 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 677 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 678 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } else if (format == PETSC_VIEWER_ASCII_INFO) { 682 PetscFunctionReturn(0); 683 } 684 } else if (isdraw) { 685 PetscDraw draw; 686 PetscBool isnull; 687 688 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 689 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 690 if (isnull) PetscFunctionReturn(0); 691 } 692 693 if (size == 1) { 694 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 695 } else { 696 /* assemble the entire matrix onto first processor. */ 697 Mat A; 698 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 699 PetscInt *cols; 700 PetscScalar *vals; 701 702 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 703 if (!rank) { 704 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 705 } else { 706 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 707 } 708 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 709 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 710 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);CHKERRQ(ierr); 711 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 712 713 /* Copy the matrix ... This isn't the most efficient means, 714 but it's quick for now */ 715 A->insertmode = INSERT_VALUES; 716 row = mat->rmap->rstart; m = mdn->A->rmap->n; 717 for (i=0; i<m; i++) { 718 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 719 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 720 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 721 row++; 722 } 723 724 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 725 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 726 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 727 if (!rank) { 728 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 729 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 730 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 731 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 732 } 733 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 734 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 735 ierr = MatDestroy(&A);CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIDense" 742 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 743 { 744 PetscErrorCode ierr; 745 PetscBool iascii,isbinary,isdraw,issocket; 746 747 PetscFunctionBegin; 748 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 749 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 750 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 751 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 752 753 if (iascii || issocket || isdraw) { 754 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 755 } else if (isbinary) { 756 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 757 } 758 PetscFunctionReturn(0); 759 } 760 761 #undef __FUNCT__ 762 #define __FUNCT__ "MatGetInfo_MPIDense" 763 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 764 { 765 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 766 Mat mdn = mat->A; 767 PetscErrorCode ierr; 768 PetscReal isend[5],irecv[5]; 769 770 PetscFunctionBegin; 771 info->block_size = 1.0; 772 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 773 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 774 isend[3] = info->memory; isend[4] = info->mallocs; 775 if (flag == MAT_LOCAL) { 776 info->nz_used = isend[0]; 777 info->nz_allocated = isend[1]; 778 info->nz_unneeded = isend[2]; 779 info->memory = isend[3]; 780 info->mallocs = isend[4]; 781 } else if (flag == MAT_GLOBAL_MAX) { 782 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 783 info->nz_used = irecv[0]; 784 info->nz_allocated = irecv[1]; 785 info->nz_unneeded = irecv[2]; 786 info->memory = irecv[3]; 787 info->mallocs = irecv[4]; 788 } else if (flag == MAT_GLOBAL_SUM) { 789 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 790 info->nz_used = irecv[0]; 791 info->nz_allocated = irecv[1]; 792 info->nz_unneeded = irecv[2]; 793 info->memory = irecv[3]; 794 info->mallocs = irecv[4]; 795 } 796 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 797 info->fill_ratio_needed = 0; 798 info->factor_mallocs = 0; 799 PetscFunctionReturn(0); 800 } 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatSetOption_MPIDense" 804 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 805 { 806 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 807 PetscErrorCode ierr; 808 809 PetscFunctionBegin; 810 switch (op) { 811 case MAT_NEW_NONZERO_LOCATIONS: 812 case MAT_NEW_NONZERO_LOCATION_ERR: 813 case MAT_NEW_NONZERO_ALLOCATION_ERR: 814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 815 break; 816 case MAT_ROW_ORIENTED: 817 a->roworiented = flg; 818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 819 break; 820 case MAT_NEW_DIAGONALS: 821 case MAT_KEEP_NONZERO_PATTERN: 822 case MAT_USE_HASH_TABLE: 823 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 824 break; 825 case MAT_IGNORE_OFF_PROC_ENTRIES: 826 a->donotstash = flg; 827 break; 828 case MAT_SYMMETRIC: 829 case MAT_STRUCTURALLY_SYMMETRIC: 830 case MAT_HERMITIAN: 831 case MAT_SYMMETRY_ETERNAL: 832 case MAT_IGNORE_LOWER_TRIANGULAR: 833 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 834 break; 835 default: 836 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 837 } 838 PetscFunctionReturn(0); 839 } 840 841 842 #undef __FUNCT__ 843 #define __FUNCT__ "MatDiagonalScale_MPIDense" 844 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 845 { 846 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 847 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 848 PetscScalar *l,*r,x,*v; 849 PetscErrorCode ierr; 850 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 851 852 PetscFunctionBegin; 853 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 854 if (ll) { 855 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 856 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 857 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 858 for (i=0; i<m; i++) { 859 x = l[i]; 860 v = mat->v + i; 861 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 862 } 863 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 864 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 865 } 866 if (rr) { 867 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 868 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 869 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 870 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 872 for (i=0; i<n; i++) { 873 x = r[i]; 874 v = mat->v + i*m; 875 for (j=0; j<m; j++) { (*v++) *= x;} 876 } 877 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 878 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 879 } 880 PetscFunctionReturn(0); 881 } 882 883 #undef __FUNCT__ 884 #define __FUNCT__ "MatNorm_MPIDense" 885 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 886 { 887 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 888 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 889 PetscErrorCode ierr; 890 PetscInt i,j; 891 PetscReal sum = 0.0; 892 PetscScalar *v = mat->v; 893 894 PetscFunctionBegin; 895 if (mdn->size == 1) { 896 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 897 } else { 898 if (type == NORM_FROBENIUS) { 899 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 900 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 901 } 902 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 903 *nrm = PetscSqrtReal(*nrm); 904 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 905 } else if (type == NORM_1) { 906 PetscReal *tmp,*tmp2; 907 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 908 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 909 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 910 *nrm = 0.0; 911 v = mat->v; 912 for (j=0; j<mdn->A->cmap->n; j++) { 913 for (i=0; i<mdn->A->rmap->n; i++) { 914 tmp[j] += PetscAbsScalar(*v); v++; 915 } 916 } 917 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 918 for (j=0; j<A->cmap->N; j++) { 919 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 920 } 921 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 922 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 923 } else if (type == NORM_INFINITY) { /* max row norm */ 924 PetscReal ntemp; 925 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 926 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 927 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 928 } 929 PetscFunctionReturn(0); 930 } 931 932 #undef __FUNCT__ 933 #define __FUNCT__ "MatTranspose_MPIDense" 934 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 935 { 936 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 937 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 938 Mat B; 939 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 940 PetscErrorCode ierr; 941 PetscInt j,i; 942 PetscScalar *v; 943 944 PetscFunctionBegin; 945 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 946 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 947 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 948 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 949 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 950 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 951 } else { 952 B = *matout; 953 } 954 955 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 956 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 957 for (i=0; i<m; i++) rwork[i] = rstart + i; 958 for (j=0; j<n; j++) { 959 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 960 v += m; 961 } 962 ierr = PetscFree(rwork);CHKERRQ(ierr); 963 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 964 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 965 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 966 *matout = B; 967 } else { 968 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 969 } 970 PetscFunctionReturn(0); 971 } 972 973 974 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 975 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatSetUp_MPIDense" 979 PetscErrorCode MatSetUp_MPIDense(Mat A) 980 { 981 PetscErrorCode ierr; 982 983 PetscFunctionBegin; 984 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } 987 988 #undef __FUNCT__ 989 #define __FUNCT__ "MatAXPY_MPIDense" 990 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 991 { 992 PetscErrorCode ierr; 993 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 994 995 PetscFunctionBegin; 996 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 997 PetscFunctionReturn(0); 998 } 999 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatConjugate_MPIDense" 1002 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1003 { 1004 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1005 PetscErrorCode ierr; 1006 1007 PetscFunctionBegin; 1008 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatRealPart_MPIDense" 1014 PetscErrorCode MatRealPart_MPIDense(Mat A) 1015 { 1016 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1017 PetscErrorCode ierr; 1018 1019 PetscFunctionBegin; 1020 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1026 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1027 { 1028 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1029 PetscErrorCode ierr; 1030 1031 PetscFunctionBegin; 1032 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1033 PetscFunctionReturn(0); 1034 } 1035 1036 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1039 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1040 { 1041 PetscErrorCode ierr; 1042 PetscInt i,n; 1043 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1044 PetscReal *work; 1045 1046 PetscFunctionBegin; 1047 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 1048 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 1049 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1050 if (type == NORM_2) { 1051 for (i=0; i<n; i++) work[i] *= work[i]; 1052 } 1053 if (type == NORM_INFINITY) { 1054 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1055 } else { 1056 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1057 } 1058 ierr = PetscFree(work);CHKERRQ(ierr); 1059 if (type == NORM_2) { 1060 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatSetRandom_MPIDense" 1067 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1068 { 1069 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1070 PetscErrorCode ierr; 1071 PetscScalar *a; 1072 PetscInt m,n,i; 1073 1074 PetscFunctionBegin; 1075 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1076 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1077 for (i=0; i<m*n; i++) { 1078 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1079 } 1080 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 /* -------------------------------------------------------------------*/ 1085 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1086 MatGetRow_MPIDense, 1087 MatRestoreRow_MPIDense, 1088 MatMult_MPIDense, 1089 /* 4*/ MatMultAdd_MPIDense, 1090 MatMultTranspose_MPIDense, 1091 MatMultTransposeAdd_MPIDense, 1092 0, 1093 0, 1094 0, 1095 /* 10*/ 0, 1096 0, 1097 0, 1098 0, 1099 MatTranspose_MPIDense, 1100 /* 15*/ MatGetInfo_MPIDense, 1101 MatEqual_MPIDense, 1102 MatGetDiagonal_MPIDense, 1103 MatDiagonalScale_MPIDense, 1104 MatNorm_MPIDense, 1105 /* 20*/ MatAssemblyBegin_MPIDense, 1106 MatAssemblyEnd_MPIDense, 1107 MatSetOption_MPIDense, 1108 MatZeroEntries_MPIDense, 1109 /* 24*/ MatZeroRows_MPIDense, 1110 0, 1111 0, 1112 0, 1113 0, 1114 /* 29*/ MatSetUp_MPIDense, 1115 0, 1116 0, 1117 0, 1118 0, 1119 /* 34*/ MatDuplicate_MPIDense, 1120 0, 1121 0, 1122 0, 1123 0, 1124 /* 39*/ MatAXPY_MPIDense, 1125 MatGetSubMatrices_MPIDense, 1126 0, 1127 MatGetValues_MPIDense, 1128 0, 1129 /* 44*/ 0, 1130 MatScale_MPIDense, 1131 0, 1132 0, 1133 0, 1134 /* 49*/ MatSetRandom_MPIDense, 1135 0, 1136 0, 1137 0, 1138 0, 1139 /* 54*/ 0, 1140 0, 1141 0, 1142 0, 1143 0, 1144 /* 59*/ MatGetSubMatrix_MPIDense, 1145 MatDestroy_MPIDense, 1146 MatView_MPIDense, 1147 0, 1148 0, 1149 /* 64*/ 0, 1150 0, 1151 0, 1152 0, 1153 0, 1154 /* 69*/ 0, 1155 0, 1156 0, 1157 0, 1158 0, 1159 /* 74*/ 0, 1160 0, 1161 0, 1162 0, 1163 0, 1164 /* 79*/ 0, 1165 0, 1166 0, 1167 0, 1168 /* 83*/ MatLoad_MPIDense, 1169 0, 1170 0, 1171 0, 1172 0, 1173 0, 1174 /* 89*/ 1175 0, 1176 0, 1177 0, 1178 0, 1179 0, 1180 /* 94*/ 0, 1181 0, 1182 0, 1183 0, 1184 0, 1185 /* 99*/ 0, 1186 0, 1187 0, 1188 MatConjugate_MPIDense, 1189 0, 1190 /*104*/ 0, 1191 MatRealPart_MPIDense, 1192 MatImaginaryPart_MPIDense, 1193 0, 1194 0, 1195 /*109*/ 0, 1196 0, 1197 0, 1198 0, 1199 0, 1200 /*114*/ 0, 1201 0, 1202 0, 1203 0, 1204 0, 1205 /*119*/ 0, 1206 0, 1207 0, 1208 0, 1209 0, 1210 /*124*/ 0, 1211 MatGetColumnNorms_MPIDense 1212 }; 1213 1214 EXTERN_C_BEGIN 1215 #undef __FUNCT__ 1216 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1217 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1218 { 1219 Mat_MPIDense *a; 1220 PetscErrorCode ierr; 1221 1222 PetscFunctionBegin; 1223 mat->preallocated = PETSC_TRUE; 1224 /* Note: For now, when data is specified above, this assumes the user correctly 1225 allocates the local dense storage space. We should add error checking. */ 1226 1227 a = (Mat_MPIDense*)mat->data; 1228 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1229 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1230 a->nvec = mat->cmap->n; 1231 1232 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1233 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1234 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1235 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1236 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 EXTERN_C_END 1240 1241 EXTERN_C_BEGIN 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatCreate_MPIDense" 1244 PetscErrorCode MatCreate_MPIDense(Mat mat) 1245 { 1246 Mat_MPIDense *a; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1251 mat->data = (void*)a; 1252 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1253 1254 mat->insertmode = NOT_SET_VALUES; 1255 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1256 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1257 1258 /* build cache for off array entries formed */ 1259 a->donotstash = PETSC_FALSE; 1260 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1261 1262 /* stuff used for matrix vector multiply */ 1263 a->lvec = 0; 1264 a->Mvctx = 0; 1265 a->roworiented = PETSC_TRUE; 1266 1267 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1269 1270 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1271 "MatGetDiagonalBlock_MPIDense", 1272 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1274 "MatMPIDenseSetPreallocation_MPIDense", 1275 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1277 "MatMatMult_MPIAIJ_MPIDense", 1278 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1280 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1281 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1283 "MatMatMultNumeric_MPIAIJ_MPIDense", 1284 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1285 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 EXTERN_C_END 1289 1290 /*MC 1291 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1292 1293 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1294 and MATMPIDENSE otherwise. 1295 1296 Options Database Keys: 1297 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1298 1299 Level: beginner 1300 1301 1302 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1303 M*/ 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1307 /*@C 1308 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1309 1310 Not collective 1311 1312 Input Parameters: 1313 . A - the matrix 1314 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1315 to control all matrix memory allocation. 1316 1317 Notes: 1318 The dense format is fully compatible with standard Fortran 77 1319 storage by columns. 1320 1321 The data input variable is intended primarily for Fortran programmers 1322 who wish to allocate their own matrix memory space. Most users should 1323 set data=PETSC_NULL. 1324 1325 Level: intermediate 1326 1327 .keywords: matrix,dense, parallel 1328 1329 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1330 @*/ 1331 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1332 { 1333 PetscErrorCode ierr; 1334 1335 PetscFunctionBegin; 1336 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatCreateDense" 1342 /*@C 1343 MatCreateDense - Creates a parallel matrix in dense format. 1344 1345 Collective on MPI_Comm 1346 1347 Input Parameters: 1348 + comm - MPI communicator 1349 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1350 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1351 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1352 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1353 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1354 to control all matrix memory allocation. 1355 1356 Output Parameter: 1357 . A - the matrix 1358 1359 Notes: 1360 The dense format is fully compatible with standard Fortran 77 1361 storage by columns. 1362 1363 The data input variable is intended primarily for Fortran programmers 1364 who wish to allocate their own matrix memory space. Most users should 1365 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1366 1367 The user MUST specify either the local or global matrix dimensions 1368 (possibly both). 1369 1370 Level: intermediate 1371 1372 .keywords: matrix,dense, parallel 1373 1374 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1375 @*/ 1376 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1377 { 1378 PetscErrorCode ierr; 1379 PetscMPIInt size; 1380 1381 PetscFunctionBegin; 1382 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1383 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1384 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1385 if (size > 1) { 1386 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1387 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1388 } else { 1389 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1390 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1391 } 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "MatDuplicate_MPIDense" 1397 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1398 { 1399 Mat mat; 1400 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 *newmat = 0; 1405 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1406 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1407 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1408 a = (Mat_MPIDense*)mat->data; 1409 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1410 1411 mat->factortype = A->factortype; 1412 mat->assembled = PETSC_TRUE; 1413 mat->preallocated = PETSC_TRUE; 1414 1415 a->size = oldmat->size; 1416 a->rank = oldmat->rank; 1417 mat->insertmode = NOT_SET_VALUES; 1418 a->nvec = oldmat->nvec; 1419 a->donotstash = oldmat->donotstash; 1420 1421 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1422 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1423 1424 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1425 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1426 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1427 1428 *newmat = mat; 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1434 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1435 { 1436 PetscErrorCode ierr; 1437 PetscMPIInt rank,size; 1438 PetscInt *rowners,i,m,nz,j; 1439 PetscScalar *array,*vals,*vals_ptr; 1440 1441 PetscFunctionBegin; 1442 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1443 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1444 1445 /* determine ownership of all rows */ 1446 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1447 else m = newmat->rmap->n; 1448 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1449 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1450 rowners[0] = 0; 1451 for (i=2; i<=size; i++) { 1452 rowners[i] += rowners[i-1]; 1453 } 1454 1455 if (!sizesset) { 1456 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1457 } 1458 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1459 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1460 1461 if (!rank) { 1462 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1463 1464 /* read in my part of the matrix numerical values */ 1465 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1466 1467 /* insert into matrix-by row (this is why cannot directly read into array */ 1468 vals_ptr = vals; 1469 for (i=0; i<m; i++) { 1470 for (j=0; j<N; j++) { 1471 array[i + j*m] = *vals_ptr++; 1472 } 1473 } 1474 1475 /* read in other processors and ship out */ 1476 for (i=1; i<size; i++) { 1477 nz = (rowners[i+1] - rowners[i])*N; 1478 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1479 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1480 } 1481 } else { 1482 /* receive numeric values */ 1483 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1484 1485 /* receive message of values*/ 1486 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1487 1488 /* insert into matrix-by row (this is why cannot directly read into array */ 1489 vals_ptr = vals; 1490 for (i=0; i<m; i++) { 1491 for (j=0; j<N; j++) { 1492 array[i + j*m] = *vals_ptr++; 1493 } 1494 } 1495 } 1496 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1497 ierr = PetscFree(rowners);CHKERRQ(ierr); 1498 ierr = PetscFree(vals);CHKERRQ(ierr); 1499 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1500 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatLoad_MPIDense" 1506 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1507 { 1508 PetscScalar *vals,*svals; 1509 MPI_Comm comm = ((PetscObject)viewer)->comm; 1510 MPI_Status status; 1511 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1512 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1513 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1514 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1515 int fd; 1516 PetscErrorCode ierr; 1517 1518 PetscFunctionBegin; 1519 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1520 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1521 if (!rank) { 1522 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1523 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1524 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1525 } 1526 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1527 1528 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1529 M = header[1]; N = header[2]; nz = header[3]; 1530 1531 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1532 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1533 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1534 1535 /* If global sizes are set, check if they are consistent with that given in the file */ 1536 if (sizesset) { 1537 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1538 } 1539 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1540 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1541 1542 /* 1543 Handle case where matrix is stored on disk as a dense matrix 1544 */ 1545 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1546 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1547 PetscFunctionReturn(0); 1548 } 1549 1550 /* determine ownership of all rows */ 1551 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1552 else m = PetscMPIIntCast(newmat->rmap->n); 1553 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1554 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1555 rowners[0] = 0; 1556 for (i=2; i<=size; i++) { 1557 rowners[i] += rowners[i-1]; 1558 } 1559 rstart = rowners[rank]; 1560 rend = rowners[rank+1]; 1561 1562 /* distribute row lengths to all processors */ 1563 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1564 if (!rank) { 1565 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1566 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1567 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1568 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1569 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1570 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1571 } else { 1572 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1573 } 1574 1575 if (!rank) { 1576 /* calculate the number of nonzeros on each processor */ 1577 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1578 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1579 for (i=0; i<size; i++) { 1580 for (j=rowners[i]; j< rowners[i+1]; j++) { 1581 procsnz[i] += rowlengths[j]; 1582 } 1583 } 1584 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1585 1586 /* determine max buffer needed and allocate it */ 1587 maxnz = 0; 1588 for (i=0; i<size; i++) { 1589 maxnz = PetscMax(maxnz,procsnz[i]); 1590 } 1591 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1592 1593 /* read in my part of the matrix column indices */ 1594 nz = procsnz[0]; 1595 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1596 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1597 1598 /* read in every one elses and ship off */ 1599 for (i=1; i<size; i++) { 1600 nz = procsnz[i]; 1601 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1602 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1603 } 1604 ierr = PetscFree(cols);CHKERRQ(ierr); 1605 } else { 1606 /* determine buffer space needed for message */ 1607 nz = 0; 1608 for (i=0; i<m; i++) { 1609 nz += ourlens[i]; 1610 } 1611 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1612 1613 /* receive message of column indices*/ 1614 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1615 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1616 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1617 } 1618 1619 /* loop over local rows, determining number of off diagonal entries */ 1620 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1621 jj = 0; 1622 for (i=0; i<m; i++) { 1623 for (j=0; j<ourlens[i]; j++) { 1624 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1625 jj++; 1626 } 1627 } 1628 1629 /* create our matrix */ 1630 for (i=0; i<m; i++) { 1631 ourlens[i] -= offlens[i]; 1632 } 1633 1634 if (!sizesset) { 1635 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1636 } 1637 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1638 for (i=0; i<m; i++) { 1639 ourlens[i] += offlens[i]; 1640 } 1641 1642 if (!rank) { 1643 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1644 1645 /* read in my part of the matrix numerical values */ 1646 nz = procsnz[0]; 1647 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1648 1649 /* insert into matrix */ 1650 jj = rstart; 1651 smycols = mycols; 1652 svals = vals; 1653 for (i=0; i<m; i++) { 1654 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1655 smycols += ourlens[i]; 1656 svals += ourlens[i]; 1657 jj++; 1658 } 1659 1660 /* read in other processors and ship out */ 1661 for (i=1; i<size; i++) { 1662 nz = procsnz[i]; 1663 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1664 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1665 } 1666 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1667 } else { 1668 /* receive numeric values */ 1669 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1670 1671 /* receive message of values*/ 1672 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1673 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1674 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1675 1676 /* insert into matrix */ 1677 jj = rstart; 1678 smycols = mycols; 1679 svals = vals; 1680 for (i=0; i<m; i++) { 1681 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1682 smycols += ourlens[i]; 1683 svals += ourlens[i]; 1684 jj++; 1685 } 1686 } 1687 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 1688 ierr = PetscFree(vals);CHKERRQ(ierr); 1689 ierr = PetscFree(mycols);CHKERRQ(ierr); 1690 ierr = PetscFree(rowners);CHKERRQ(ierr); 1691 1692 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1693 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1694 PetscFunctionReturn(0); 1695 } 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatEqual_MPIDense" 1699 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1700 { 1701 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1702 Mat a,b; 1703 PetscBool flg; 1704 PetscErrorCode ierr; 1705 1706 PetscFunctionBegin; 1707 a = matA->A; 1708 b = matB->A; 1709 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1710 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1711 PetscFunctionReturn(0); 1712 } 1713 1714