1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatDenseGetLocalMatrix" 12 /*@ 13 14 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 15 matrix that represents the operator. For sequential matrices it returns itself. 16 17 Input Parameter: 18 . A - the Seq or MPI dense matrix 19 20 Output Parameter: 21 . B - the inner matrix 22 23 Level: intermediate 24 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 29 PetscErrorCode ierr; 30 PetscBool flg; 31 32 PetscFunctionBegin; 33 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else *B = A; 36 PetscFunctionReturn(0); 37 } 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatGetRow_MPIDense" 41 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 42 { 43 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 44 PetscErrorCode ierr; 45 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 46 47 PetscFunctionBegin; 48 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 49 lrow = row - rstart; 50 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatRestoreRow_MPIDense" 56 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 57 { 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 62 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 68 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 69 { 70 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 71 PetscErrorCode ierr; 72 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 73 PetscScalar *array; 74 MPI_Comm comm; 75 Mat B; 76 77 PetscFunctionBegin; 78 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 79 80 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 81 if (!B) { 82 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 83 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 84 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 85 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 86 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 87 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 88 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 89 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 90 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 92 *a = B; 93 ierr = MatDestroy(&B);CHKERRQ(ierr); 94 } else *a = B; 95 PetscFunctionReturn(0); 96 } 97 98 #undef __FUNCT__ 99 #define __FUNCT__ "MatSetValues_MPIDense" 100 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 101 { 102 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 103 PetscErrorCode ierr; 104 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 105 PetscBool roworiented = A->roworiented; 106 107 PetscFunctionBegin; 108 for (i=0; i<m; i++) { 109 if (idxm[i] < 0) continue; 110 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 111 if (idxm[i] >= rstart && idxm[i] < rend) { 112 row = idxm[i] - rstart; 113 if (roworiented) { 114 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 115 } else { 116 for (j=0; j<n; j++) { 117 if (idxn[j] < 0) continue; 118 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 119 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 120 } 121 } 122 } else if (!A->donotstash) { 123 mat->assembled = PETSC_FALSE; 124 if (roworiented) { 125 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 126 } else { 127 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 128 } 129 } 130 } 131 PetscFunctionReturn(0); 132 } 133 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatGetValues_MPIDense" 136 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 137 { 138 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 141 142 PetscFunctionBegin; 143 for (i=0; i<m; i++) { 144 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 145 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 146 if (idxm[i] >= rstart && idxm[i] < rend) { 147 row = idxm[i] - rstart; 148 for (j=0; j<n; j++) { 149 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 150 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 151 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 152 } 153 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 154 } 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatDenseGetArray_MPIDense" 160 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 161 { 162 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 163 PetscErrorCode ierr; 164 165 PetscFunctionBegin; 166 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 172 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 173 { 174 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 175 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 176 PetscErrorCode ierr; 177 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 178 const PetscInt *irow,*icol; 179 PetscScalar *av,*bv,*v = lmat->v; 180 Mat newmat; 181 IS iscol_local; 182 183 PetscFunctionBegin; 184 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 185 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 186 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 187 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 188 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 189 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 190 191 /* No parallel redistribution currently supported! Should really check each index set 192 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 193 original matrix! */ 194 195 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 196 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 197 198 /* Check submatrix call */ 199 if (scall == MAT_REUSE_MATRIX) { 200 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 201 /* Really need to test rows and column sizes! */ 202 newmat = *B; 203 } else { 204 /* Create and fill new matrix */ 205 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 206 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 207 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 208 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 209 } 210 211 /* Now extract the data pointers and do the copy, column at a time */ 212 newmatd = (Mat_MPIDense*)newmat->data; 213 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 214 215 for (i=0; i<Ncols; i++) { 216 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 217 for (j=0; j<nrows; j++) { 218 *bv++ = av[irow[j] - rstart]; 219 } 220 } 221 222 /* Assemble the matrices so that the correct flags are set */ 223 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 224 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 225 226 /* Free work space */ 227 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 228 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 229 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 230 *B = newmat; 231 PetscFunctionReturn(0); 232 } 233 234 #undef __FUNCT__ 235 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 236 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 237 { 238 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 239 PetscErrorCode ierr; 240 241 PetscFunctionBegin; 242 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 248 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 249 { 250 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 251 MPI_Comm comm; 252 PetscErrorCode ierr; 253 PetscInt nstash,reallocs; 254 InsertMode addv; 255 256 PetscFunctionBegin; 257 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 258 /* make sure all processors are either in INSERTMODE or ADDMODE */ 259 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 260 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 261 mat->insertmode = addv; /* in case this processor had no cache */ 262 263 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 264 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 265 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 271 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 272 { 273 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 274 PetscErrorCode ierr; 275 PetscInt i,*row,*col,flg,j,rstart,ncols; 276 PetscMPIInt n; 277 PetscScalar *val; 278 InsertMode addv=mat->insertmode; 279 280 PetscFunctionBegin; 281 /* wait on receives */ 282 while (1) { 283 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 284 if (!flg) break; 285 286 for (i=0; i<n;) { 287 /* Now identify the consecutive vals belonging to the same row */ 288 for (j=i,rstart=row[j]; j<n; j++) { 289 if (row[j] != rstart) break; 290 } 291 if (j < n) ncols = j-i; 292 else ncols = n-i; 293 /* Now assemble all these values with a single function call */ 294 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 295 i = j; 296 } 297 } 298 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 299 300 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 301 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 302 303 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 304 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 305 } 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatZeroEntries_MPIDense" 311 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 312 { 313 PetscErrorCode ierr; 314 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 315 316 PetscFunctionBegin; 317 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320 321 /* the code does not do the diagonal entries correctly unless the 322 matrix is square and the column and row owerships are identical. 323 This is a BUG. The only way to fix it seems to be to access 324 mdn->A and mdn->B directly and not through the MatZeroRows() 325 routine. 326 */ 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatZeroRows_MPIDense" 329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 330 { 331 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 332 PetscErrorCode ierr; 333 PetscInt i,*owners = A->rmap->range; 334 PetscInt *sizes,j,idx,nsends; 335 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 336 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 337 PetscInt *lens,*lrows,*values; 338 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 339 MPI_Comm comm; 340 MPI_Request *send_waits,*recv_waits; 341 MPI_Status recv_status,*send_status; 342 PetscBool found; 343 const PetscScalar *xx; 344 PetscScalar *bb; 345 346 PetscFunctionBegin; 347 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 348 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 349 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 350 /* first count number of contributors to each processor */ 351 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 352 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 353 for (i=0; i<N; i++) { 354 idx = rows[i]; 355 found = PETSC_FALSE; 356 for (j=0; j<size; j++) { 357 if (idx >= owners[j] && idx < owners[j+1]) { 358 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 359 } 360 } 361 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 362 } 363 nsends = 0; 364 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 365 366 /* inform other processors of number of messages and max length*/ 367 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 368 369 /* post receives: */ 370 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 371 ierr = PetscMalloc1((nrecvs+1),&recv_waits);CHKERRQ(ierr); 372 for (i=0; i<nrecvs; i++) { 373 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 374 } 375 376 /* do sends: 377 1) starts[i] gives the starting index in svalues for stuff going to 378 the ith processor 379 */ 380 ierr = PetscMalloc1((N+1),&svalues);CHKERRQ(ierr); 381 ierr = PetscMalloc1((nsends+1),&send_waits);CHKERRQ(ierr); 382 ierr = PetscMalloc1((size+1),&starts);CHKERRQ(ierr); 383 384 starts[0] = 0; 385 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 386 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 387 388 starts[0] = 0; 389 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 390 count = 0; 391 for (i=0; i<size; i++) { 392 if (sizes[2*i+1]) { 393 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 394 } 395 } 396 ierr = PetscFree(starts);CHKERRQ(ierr); 397 398 base = owners[rank]; 399 400 /* wait on receives */ 401 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 402 count = nrecvs; 403 slen = 0; 404 while (count) { 405 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 406 /* unpack receives into our local space */ 407 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 408 409 source[imdex] = recv_status.MPI_SOURCE; 410 lens[imdex] = n; 411 slen += n; 412 count--; 413 } 414 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 415 416 /* move the data into the send scatter */ 417 ierr = PetscMalloc1((slen+1),&lrows);CHKERRQ(ierr); 418 count = 0; 419 for (i=0; i<nrecvs; i++) { 420 values = rvalues + i*nmax; 421 for (j=0; j<lens[i]; j++) { 422 lrows[count++] = values[j] - base; 423 } 424 } 425 ierr = PetscFree(rvalues);CHKERRQ(ierr); 426 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 427 ierr = PetscFree(owner);CHKERRQ(ierr); 428 ierr = PetscFree(sizes);CHKERRQ(ierr); 429 430 /* fix right hand side if needed */ 431 if (x && b) { 432 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 433 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 434 for (i=0; i<slen; i++) { 435 bb[lrows[i]] = diag*xx[lrows[i]]; 436 } 437 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 438 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 439 } 440 441 /* actually zap the local rows */ 442 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 443 if (diag != 0.0) { 444 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 445 PetscInt m = ll->lda, i; 446 447 for (i=0; i<slen; i++) { 448 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 449 } 450 } 451 ierr = PetscFree(lrows);CHKERRQ(ierr); 452 453 /* wait on sends */ 454 if (nsends) { 455 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 456 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 457 ierr = PetscFree(send_status);CHKERRQ(ierr); 458 } 459 ierr = PetscFree(send_waits);CHKERRQ(ierr); 460 ierr = PetscFree(svalues);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatMult_MPIDense" 466 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 467 { 468 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 469 PetscErrorCode ierr; 470 471 PetscFunctionBegin; 472 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 473 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 474 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatMultAdd_MPIDense" 480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 483 PetscErrorCode ierr; 484 485 PetscFunctionBegin; 486 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 487 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 488 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 #undef __FUNCT__ 493 #define __FUNCT__ "MatMultTranspose_MPIDense" 494 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 495 { 496 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 497 PetscErrorCode ierr; 498 PetscScalar zero = 0.0; 499 500 PetscFunctionBegin; 501 ierr = VecSet(yy,zero);CHKERRQ(ierr); 502 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 503 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 504 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 510 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 513 PetscErrorCode ierr; 514 515 PetscFunctionBegin; 516 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 517 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 518 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 519 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatGetDiagonal_MPIDense" 525 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 526 { 527 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 528 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 529 PetscErrorCode ierr; 530 PetscInt len,i,n,m = A->rmap->n,radd; 531 PetscScalar *x,zero = 0.0; 532 533 PetscFunctionBegin; 534 ierr = VecSet(v,zero);CHKERRQ(ierr); 535 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 536 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 537 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 538 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 539 radd = A->rmap->rstart*m; 540 for (i=0; i<len; i++) { 541 x[i] = aloc->v[radd + i*m + i]; 542 } 543 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatDestroy_MPIDense" 549 PetscErrorCode MatDestroy_MPIDense(Mat mat) 550 { 551 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 #if defined(PETSC_USE_LOG) 556 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 557 #endif 558 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 559 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 560 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 561 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 562 563 ierr = PetscFree(mat->data);CHKERRQ(ierr); 564 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 565 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 568 #if defined(PETSC_HAVE_ELEMENTAL) 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 570 #endif 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatView_MPIDense_Binary" 584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 585 { 586 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 587 PetscErrorCode ierr; 588 PetscViewerFormat format; 589 int fd; 590 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 591 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 592 PetscScalar *work,*v,*vv; 593 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 594 595 PetscFunctionBegin; 596 if (mdn->size == 1) { 597 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 598 } else { 599 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 600 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 601 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 602 603 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 604 if (format == PETSC_VIEWER_NATIVE) { 605 606 if (!rank) { 607 /* store the matrix as a dense matrix */ 608 header[0] = MAT_FILE_CLASSID; 609 header[1] = mat->rmap->N; 610 header[2] = N; 611 header[3] = MATRIX_BINARY_FORMAT_DENSE; 612 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 613 614 /* get largest work array needed for transposing array */ 615 mmax = mat->rmap->n; 616 for (i=1; i<size; i++) { 617 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 618 } 619 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 620 621 /* write out local array, by rows */ 622 m = mat->rmap->n; 623 v = a->v; 624 for (j=0; j<N; j++) { 625 for (i=0; i<m; i++) { 626 work[j + i*N] = *v++; 627 } 628 } 629 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 630 /* get largest work array to receive messages from other processes, excludes process zero */ 631 mmax = 0; 632 for (i=1; i<size; i++) { 633 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 634 } 635 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 636 for (k = 1; k < size; k++) { 637 v = vv; 638 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 639 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 641 for (j = 0; j < N; j++) { 642 for (i = 0; i < m; i++) { 643 work[j + i*N] = *v++; 644 } 645 } 646 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 647 } 648 ierr = PetscFree(work);CHKERRQ(ierr); 649 ierr = PetscFree(vv);CHKERRQ(ierr); 650 } else { 651 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 652 } 653 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 654 } 655 PetscFunctionReturn(0); 656 } 657 658 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 659 #include <petscdraw.h> 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 662 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 663 { 664 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 665 PetscErrorCode ierr; 666 PetscMPIInt rank = mdn->rank; 667 PetscViewerType vtype; 668 PetscBool iascii,isdraw; 669 PetscViewer sviewer; 670 PetscViewerFormat format; 671 672 PetscFunctionBegin; 673 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 674 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 675 if (iascii) { 676 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 677 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 678 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 679 MatInfo info; 680 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 681 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 682 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 683 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 684 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 685 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } else if (format == PETSC_VIEWER_ASCII_INFO) { 688 PetscFunctionReturn(0); 689 } 690 } else if (isdraw) { 691 PetscDraw draw; 692 PetscBool isnull; 693 694 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 695 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 696 if (isnull) PetscFunctionReturn(0); 697 } 698 699 { 700 /* assemble the entire matrix onto first processor. */ 701 Mat A; 702 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 703 PetscInt *cols; 704 PetscScalar *vals; 705 706 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 707 if (!rank) { 708 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 709 } else { 710 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 711 } 712 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 713 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 714 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 715 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 716 717 /* Copy the matrix ... This isn't the most efficient means, 718 but it's quick for now */ 719 A->insertmode = INSERT_VALUES; 720 721 row = mat->rmap->rstart; 722 m = mdn->A->rmap->n; 723 for (i=0; i<m; i++) { 724 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 725 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 726 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 727 row++; 728 } 729 730 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 731 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 732 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 733 if (!rank) { 734 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 735 } 736 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 737 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 738 ierr = MatDestroy(&A);CHKERRQ(ierr); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatView_MPIDense" 745 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 746 { 747 PetscErrorCode ierr; 748 PetscBool iascii,isbinary,isdraw,issocket; 749 750 PetscFunctionBegin; 751 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 752 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 753 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 754 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 755 756 if (iascii || issocket || isdraw) { 757 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 758 } else if (isbinary) { 759 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 760 } 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatGetInfo_MPIDense" 766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 767 { 768 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 769 Mat mdn = mat->A; 770 PetscErrorCode ierr; 771 PetscReal isend[5],irecv[5]; 772 773 PetscFunctionBegin; 774 info->block_size = 1.0; 775 776 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 777 778 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 779 isend[3] = info->memory; isend[4] = info->mallocs; 780 if (flag == MAT_LOCAL) { 781 info->nz_used = isend[0]; 782 info->nz_allocated = isend[1]; 783 info->nz_unneeded = isend[2]; 784 info->memory = isend[3]; 785 info->mallocs = isend[4]; 786 } else if (flag == MAT_GLOBAL_MAX) { 787 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 788 789 info->nz_used = irecv[0]; 790 info->nz_allocated = irecv[1]; 791 info->nz_unneeded = irecv[2]; 792 info->memory = irecv[3]; 793 info->mallocs = irecv[4]; 794 } else if (flag == MAT_GLOBAL_SUM) { 795 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 796 797 info->nz_used = irecv[0]; 798 info->nz_allocated = irecv[1]; 799 info->nz_unneeded = irecv[2]; 800 info->memory = irecv[3]; 801 info->mallocs = irecv[4]; 802 } 803 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 804 info->fill_ratio_needed = 0; 805 info->factor_mallocs = 0; 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatSetOption_MPIDense" 811 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 812 { 813 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 814 PetscErrorCode ierr; 815 816 PetscFunctionBegin; 817 switch (op) { 818 case MAT_NEW_NONZERO_LOCATIONS: 819 case MAT_NEW_NONZERO_LOCATION_ERR: 820 case MAT_NEW_NONZERO_ALLOCATION_ERR: 821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 822 break; 823 case MAT_ROW_ORIENTED: 824 a->roworiented = flg; 825 826 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 827 break; 828 case MAT_NEW_DIAGONALS: 829 case MAT_KEEP_NONZERO_PATTERN: 830 case MAT_USE_HASH_TABLE: 831 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 832 break; 833 case MAT_IGNORE_OFF_PROC_ENTRIES: 834 a->donotstash = flg; 835 break; 836 case MAT_SYMMETRIC: 837 case MAT_STRUCTURALLY_SYMMETRIC: 838 case MAT_HERMITIAN: 839 case MAT_SYMMETRY_ETERNAL: 840 case MAT_IGNORE_LOWER_TRIANGULAR: 841 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 842 break; 843 default: 844 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 845 } 846 PetscFunctionReturn(0); 847 } 848 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatDiagonalScale_MPIDense" 852 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 853 { 854 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 855 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 856 PetscScalar *l,*r,x,*v; 857 PetscErrorCode ierr; 858 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 859 860 PetscFunctionBegin; 861 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 862 if (ll) { 863 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 864 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 865 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 866 for (i=0; i<m; i++) { 867 x = l[i]; 868 v = mat->v + i; 869 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 870 } 871 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 872 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 873 } 874 if (rr) { 875 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 876 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 877 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 880 for (i=0; i<n; i++) { 881 x = r[i]; 882 v = mat->v + i*m; 883 for (j=0; j<m; j++) (*v++) *= x; 884 } 885 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 886 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 887 } 888 PetscFunctionReturn(0); 889 } 890 891 #undef __FUNCT__ 892 #define __FUNCT__ "MatNorm_MPIDense" 893 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 894 { 895 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 896 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 897 PetscErrorCode ierr; 898 PetscInt i,j; 899 PetscReal sum = 0.0; 900 PetscScalar *v = mat->v; 901 902 PetscFunctionBegin; 903 if (mdn->size == 1) { 904 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 905 } else { 906 if (type == NORM_FROBENIUS) { 907 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 908 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 909 } 910 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 911 *nrm = PetscSqrtReal(*nrm); 912 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 913 } else if (type == NORM_1) { 914 PetscReal *tmp,*tmp2; 915 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 916 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 917 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 918 *nrm = 0.0; 919 v = mat->v; 920 for (j=0; j<mdn->A->cmap->n; j++) { 921 for (i=0; i<mdn->A->rmap->n; i++) { 922 tmp[j] += PetscAbsScalar(*v); v++; 923 } 924 } 925 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 926 for (j=0; j<A->cmap->N; j++) { 927 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 928 } 929 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 930 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 931 } else if (type == NORM_INFINITY) { /* max row norm */ 932 PetscReal ntemp; 933 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 934 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 935 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 936 } 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNCT__ 941 #define __FUNCT__ "MatTranspose_MPIDense" 942 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 943 { 944 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 945 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 946 Mat B; 947 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 948 PetscErrorCode ierr; 949 PetscInt j,i; 950 PetscScalar *v; 951 952 PetscFunctionBegin; 953 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 954 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 955 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 956 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 957 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 958 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 959 } else { 960 B = *matout; 961 } 962 963 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 964 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 965 for (i=0; i<m; i++) rwork[i] = rstart + i; 966 for (j=0; j<n; j++) { 967 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 968 v += m; 969 } 970 ierr = PetscFree(rwork);CHKERRQ(ierr); 971 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 973 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 974 *matout = B; 975 } else { 976 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 977 } 978 PetscFunctionReturn(0); 979 } 980 981 982 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 983 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatSetUp_MPIDense" 987 PetscErrorCode MatSetUp_MPIDense(Mat A) 988 { 989 PetscErrorCode ierr; 990 991 PetscFunctionBegin; 992 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatAXPY_MPIDense" 998 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 999 { 1000 PetscErrorCode ierr; 1001 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1002 1003 PetscFunctionBegin; 1004 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1005 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatConjugate_MPIDense" 1011 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1012 { 1013 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1014 PetscErrorCode ierr; 1015 1016 PetscFunctionBegin; 1017 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatRealPart_MPIDense" 1023 PetscErrorCode MatRealPart_MPIDense(Mat A) 1024 { 1025 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1026 PetscErrorCode ierr; 1027 1028 PetscFunctionBegin; 1029 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1030 PetscFunctionReturn(0); 1031 } 1032 1033 #undef __FUNCT__ 1034 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1035 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1036 { 1037 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1038 PetscErrorCode ierr; 1039 1040 PetscFunctionBegin; 1041 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1048 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1049 { 1050 PetscErrorCode ierr; 1051 PetscInt i,n; 1052 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1053 PetscReal *work; 1054 1055 PetscFunctionBegin; 1056 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1057 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1058 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1059 if (type == NORM_2) { 1060 for (i=0; i<n; i++) work[i] *= work[i]; 1061 } 1062 if (type == NORM_INFINITY) { 1063 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1064 } else { 1065 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1066 } 1067 ierr = PetscFree(work);CHKERRQ(ierr); 1068 if (type == NORM_2) { 1069 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1070 } 1071 PetscFunctionReturn(0); 1072 } 1073 1074 #undef __FUNCT__ 1075 #define __FUNCT__ "MatSetRandom_MPIDense" 1076 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1077 { 1078 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1079 PetscErrorCode ierr; 1080 PetscScalar *a; 1081 PetscInt m,n,i; 1082 1083 PetscFunctionBegin; 1084 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1085 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1086 for (i=0; i<m*n; i++) { 1087 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1088 } 1089 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 /* -------------------------------------------------------------------*/ 1094 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1095 MatGetRow_MPIDense, 1096 MatRestoreRow_MPIDense, 1097 MatMult_MPIDense, 1098 /* 4*/ MatMultAdd_MPIDense, 1099 MatMultTranspose_MPIDense, 1100 MatMultTransposeAdd_MPIDense, 1101 0, 1102 0, 1103 0, 1104 /* 10*/ 0, 1105 0, 1106 0, 1107 0, 1108 MatTranspose_MPIDense, 1109 /* 15*/ MatGetInfo_MPIDense, 1110 MatEqual_MPIDense, 1111 MatGetDiagonal_MPIDense, 1112 MatDiagonalScale_MPIDense, 1113 MatNorm_MPIDense, 1114 /* 20*/ MatAssemblyBegin_MPIDense, 1115 MatAssemblyEnd_MPIDense, 1116 MatSetOption_MPIDense, 1117 MatZeroEntries_MPIDense, 1118 /* 24*/ MatZeroRows_MPIDense, 1119 0, 1120 0, 1121 0, 1122 0, 1123 /* 29*/ MatSetUp_MPIDense, 1124 0, 1125 0, 1126 0, 1127 0, 1128 /* 34*/ MatDuplicate_MPIDense, 1129 0, 1130 0, 1131 0, 1132 0, 1133 /* 39*/ MatAXPY_MPIDense, 1134 MatGetSubMatrices_MPIDense, 1135 0, 1136 MatGetValues_MPIDense, 1137 0, 1138 /* 44*/ 0, 1139 MatScale_MPIDense, 1140 0, 1141 0, 1142 0, 1143 /* 49*/ MatSetRandom_MPIDense, 1144 0, 1145 0, 1146 0, 1147 0, 1148 /* 54*/ 0, 1149 0, 1150 0, 1151 0, 1152 0, 1153 /* 59*/ MatGetSubMatrix_MPIDense, 1154 MatDestroy_MPIDense, 1155 MatView_MPIDense, 1156 0, 1157 0, 1158 /* 64*/ 0, 1159 0, 1160 0, 1161 0, 1162 0, 1163 /* 69*/ 0, 1164 0, 1165 0, 1166 0, 1167 0, 1168 /* 74*/ 0, 1169 0, 1170 0, 1171 0, 1172 0, 1173 /* 79*/ 0, 1174 0, 1175 0, 1176 0, 1177 /* 83*/ MatLoad_MPIDense, 1178 0, 1179 0, 1180 0, 1181 0, 1182 0, 1183 /* 89*/ 1184 0, 1185 0, 1186 0, 1187 0, 1188 0, 1189 /* 94*/ 0, 1190 0, 1191 0, 1192 0, 1193 0, 1194 /* 99*/ 0, 1195 0, 1196 0, 1197 MatConjugate_MPIDense, 1198 0, 1199 /*104*/ 0, 1200 MatRealPart_MPIDense, 1201 MatImaginaryPart_MPIDense, 1202 0, 1203 0, 1204 /*109*/ 0, 1205 0, 1206 0, 1207 0, 1208 0, 1209 /*114*/ 0, 1210 0, 1211 0, 1212 0, 1213 0, 1214 /*119*/ 0, 1215 0, 1216 0, 1217 0, 1218 0, 1219 /*124*/ 0, 1220 MatGetColumnNorms_MPIDense, 1221 0, 1222 0, 1223 0, 1224 /*129*/ 0, 1225 0, 1226 0, 1227 0, 1228 0, 1229 /*134*/ 0, 1230 0, 1231 0, 1232 0, 1233 0, 1234 /*139*/ 0, 1235 0, 1236 0 1237 }; 1238 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1241 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1242 { 1243 Mat_MPIDense *a; 1244 PetscErrorCode ierr; 1245 1246 PetscFunctionBegin; 1247 mat->preallocated = PETSC_TRUE; 1248 /* Note: For now, when data is specified above, this assumes the user correctly 1249 allocates the local dense storage space. We should add error checking. */ 1250 1251 a = (Mat_MPIDense*)mat->data; 1252 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1253 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1254 a->nvec = mat->cmap->n; 1255 1256 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1257 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1258 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1259 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1260 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNCT__ 1265 #define __FUNCT__ "MatConvert_MPIDense_Elemental" 1266 PETSC_EXTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1267 { 1268 Mat mat_elemental; 1269 PetscErrorCode ierr; 1270 PetscScalar *array,*v_colwise; 1271 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,j,k,*rows,*cols; 1272 1273 PetscFunctionBegin; 1274 ierr = PetscMalloc3(m*N,&v_colwise,m,&rows,N,&cols);CHKERRQ(ierr); 1275 ierr = MatDenseGetArray(A,&array);CHKERRQ(ierr); 1276 /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */ 1277 k = 0; 1278 for (j=0; j<N; j++) { 1279 cols[j] = j; 1280 for (i=0; i<m; i++) { 1281 v_colwise[j*m+i] = array[k++]; 1282 } 1283 } 1284 for (i=0; i<m; i++) { 1285 rows[i] = rstart + i; 1286 } 1287 ierr = MatDenseRestoreArray(A,&array);CHKERRQ(ierr); 1288 1289 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1290 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1291 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1292 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1293 1294 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1295 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v_colwise,ADD_VALUES);CHKERRQ(ierr); 1296 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1297 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1298 ierr = PetscFree3(v_colwise,rows,cols);CHKERRQ(ierr); 1299 1300 if (reuse == MAT_REUSE_MATRIX) { 1301 ierr = MatHeaderReplace(A,mat_elemental);CHKERRQ(ierr); 1302 } else { 1303 *newmat = mat_elemental; 1304 } 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatCreate_MPIDense" 1310 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1311 { 1312 Mat_MPIDense *a; 1313 PetscErrorCode ierr; 1314 1315 PetscFunctionBegin; 1316 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1317 mat->data = (void*)a; 1318 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1319 1320 mat->insertmode = NOT_SET_VALUES; 1321 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1322 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1323 1324 /* build cache for off array entries formed */ 1325 a->donotstash = PETSC_FALSE; 1326 1327 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1328 1329 /* stuff used for matrix vector multiply */ 1330 a->lvec = 0; 1331 a->Mvctx = 0; 1332 a->roworiented = PETSC_TRUE; 1333 1334 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1335 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1336 #if defined(PETSC_HAVE_ELEMENTAL) 1337 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1338 #endif 1339 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1340 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1341 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1342 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1343 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1344 1345 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1346 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1347 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1348 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 /*MC 1353 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1354 1355 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1356 and MATMPIDENSE otherwise. 1357 1358 Options Database Keys: 1359 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1360 1361 Level: beginner 1362 1363 1364 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1365 M*/ 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1369 /*@C 1370 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1371 1372 Not collective 1373 1374 Input Parameters: 1375 . B - the matrix 1376 - data - optional location of matrix data. Set data=NULL for PETSc 1377 to control all matrix memory allocation. 1378 1379 Notes: 1380 The dense format is fully compatible with standard Fortran 77 1381 storage by columns. 1382 1383 The data input variable is intended primarily for Fortran programmers 1384 who wish to allocate their own matrix memory space. Most users should 1385 set data=NULL. 1386 1387 Level: intermediate 1388 1389 .keywords: matrix,dense, parallel 1390 1391 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1392 @*/ 1393 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1394 { 1395 PetscErrorCode ierr; 1396 1397 PetscFunctionBegin; 1398 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatCreateDense" 1404 /*@C 1405 MatCreateDense - Creates a parallel matrix in dense format. 1406 1407 Collective on MPI_Comm 1408 1409 Input Parameters: 1410 + comm - MPI communicator 1411 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1412 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1413 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1414 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1415 - data - optional location of matrix data. Set data=NULL (NULL_SCALAR for Fortran users) for PETSc 1416 to control all matrix memory allocation. 1417 1418 Output Parameter: 1419 . A - the matrix 1420 1421 Notes: 1422 The dense format is fully compatible with standard Fortran 77 1423 storage by columns. 1424 1425 The data input variable is intended primarily for Fortran programmers 1426 who wish to allocate their own matrix memory space. Most users should 1427 set data=NULL (NULL_SCALAR for Fortran users). 1428 1429 The user MUST specify either the local or global matrix dimensions 1430 (possibly both). 1431 1432 Level: intermediate 1433 1434 .keywords: matrix,dense, parallel 1435 1436 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1437 @*/ 1438 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1439 { 1440 PetscErrorCode ierr; 1441 PetscMPIInt size; 1442 1443 PetscFunctionBegin; 1444 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1445 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1446 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1447 if (size > 1) { 1448 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1449 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1450 } else { 1451 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1452 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1453 } 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatDuplicate_MPIDense" 1459 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1460 { 1461 Mat mat; 1462 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1463 PetscErrorCode ierr; 1464 1465 PetscFunctionBegin; 1466 *newmat = 0; 1467 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1468 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1469 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1470 a = (Mat_MPIDense*)mat->data; 1471 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1472 1473 mat->factortype = A->factortype; 1474 mat->assembled = PETSC_TRUE; 1475 mat->preallocated = PETSC_TRUE; 1476 1477 a->size = oldmat->size; 1478 a->rank = oldmat->rank; 1479 mat->insertmode = NOT_SET_VALUES; 1480 a->nvec = oldmat->nvec; 1481 a->donotstash = oldmat->donotstash; 1482 1483 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1484 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1485 1486 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1487 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1488 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1489 1490 *newmat = mat; 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1496 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1497 { 1498 PetscErrorCode ierr; 1499 PetscMPIInt rank,size; 1500 PetscInt *rowners,i,m,nz,j; 1501 PetscScalar *array,*vals,*vals_ptr; 1502 1503 PetscFunctionBegin; 1504 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1505 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1506 1507 /* determine ownership of all rows */ 1508 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1509 else m = newmat->rmap->n; 1510 ierr = PetscMalloc1((size+2),&rowners);CHKERRQ(ierr); 1511 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1512 rowners[0] = 0; 1513 for (i=2; i<=size; i++) { 1514 rowners[i] += rowners[i-1]; 1515 } 1516 1517 if (!sizesset) { 1518 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1519 } 1520 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1521 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1522 1523 if (!rank) { 1524 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1525 1526 /* read in my part of the matrix numerical values */ 1527 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1528 1529 /* insert into matrix-by row (this is why cannot directly read into array */ 1530 vals_ptr = vals; 1531 for (i=0; i<m; i++) { 1532 for (j=0; j<N; j++) { 1533 array[i + j*m] = *vals_ptr++; 1534 } 1535 } 1536 1537 /* read in other processors and ship out */ 1538 for (i=1; i<size; i++) { 1539 nz = (rowners[i+1] - rowners[i])*N; 1540 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1541 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1542 } 1543 } else { 1544 /* receive numeric values */ 1545 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1546 1547 /* receive message of values*/ 1548 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1549 1550 /* insert into matrix-by row (this is why cannot directly read into array */ 1551 vals_ptr = vals; 1552 for (i=0; i<m; i++) { 1553 for (j=0; j<N; j++) { 1554 array[i + j*m] = *vals_ptr++; 1555 } 1556 } 1557 } 1558 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1559 ierr = PetscFree(rowners);CHKERRQ(ierr); 1560 ierr = PetscFree(vals);CHKERRQ(ierr); 1561 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1562 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatLoad_MPIDense" 1568 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1569 { 1570 PetscScalar *vals,*svals; 1571 MPI_Comm comm; 1572 MPI_Status status; 1573 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1574 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1575 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1576 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1577 int fd; 1578 PetscErrorCode ierr; 1579 1580 PetscFunctionBegin; 1581 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1582 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1583 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1584 if (!rank) { 1585 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1586 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1587 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1588 } 1589 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1590 1591 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1592 M = header[1]; N = header[2]; nz = header[3]; 1593 1594 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1595 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1596 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1597 1598 /* If global sizes are set, check if they are consistent with that given in the file */ 1599 if (sizesset) { 1600 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1601 } 1602 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1603 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1604 1605 /* 1606 Handle case where matrix is stored on disk as a dense matrix 1607 */ 1608 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1609 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1610 PetscFunctionReturn(0); 1611 } 1612 1613 /* determine ownership of all rows */ 1614 if (newmat->rmap->n < 0) { 1615 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1616 } else { 1617 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1618 } 1619 ierr = PetscMalloc1((size+2),&rowners);CHKERRQ(ierr); 1620 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1621 rowners[0] = 0; 1622 for (i=2; i<=size; i++) { 1623 rowners[i] += rowners[i-1]; 1624 } 1625 rstart = rowners[rank]; 1626 rend = rowners[rank+1]; 1627 1628 /* distribute row lengths to all processors */ 1629 ierr = PetscMalloc2(rend-rstart,&ourlens,rend-rstart,&offlens);CHKERRQ(ierr); 1630 if (!rank) { 1631 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1632 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1633 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1634 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1635 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1636 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1637 } else { 1638 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1639 } 1640 1641 if (!rank) { 1642 /* calculate the number of nonzeros on each processor */ 1643 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1644 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1645 for (i=0; i<size; i++) { 1646 for (j=rowners[i]; j< rowners[i+1]; j++) { 1647 procsnz[i] += rowlengths[j]; 1648 } 1649 } 1650 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1651 1652 /* determine max buffer needed and allocate it */ 1653 maxnz = 0; 1654 for (i=0; i<size; i++) { 1655 maxnz = PetscMax(maxnz,procsnz[i]); 1656 } 1657 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1658 1659 /* read in my part of the matrix column indices */ 1660 nz = procsnz[0]; 1661 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1662 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1663 1664 /* read in every one elses and ship off */ 1665 for (i=1; i<size; i++) { 1666 nz = procsnz[i]; 1667 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1668 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1669 } 1670 ierr = PetscFree(cols);CHKERRQ(ierr); 1671 } else { 1672 /* determine buffer space needed for message */ 1673 nz = 0; 1674 for (i=0; i<m; i++) { 1675 nz += ourlens[i]; 1676 } 1677 ierr = PetscMalloc1((nz+1),&mycols);CHKERRQ(ierr); 1678 1679 /* receive message of column indices*/ 1680 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1681 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1682 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1683 } 1684 1685 /* loop over local rows, determining number of off diagonal entries */ 1686 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1687 jj = 0; 1688 for (i=0; i<m; i++) { 1689 for (j=0; j<ourlens[i]; j++) { 1690 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1691 jj++; 1692 } 1693 } 1694 1695 /* create our matrix */ 1696 for (i=0; i<m; i++) ourlens[i] -= offlens[i]; 1697 1698 if (!sizesset) { 1699 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1700 } 1701 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1702 for (i=0; i<m; i++) ourlens[i] += offlens[i]; 1703 1704 if (!rank) { 1705 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1706 1707 /* read in my part of the matrix numerical values */ 1708 nz = procsnz[0]; 1709 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1710 1711 /* insert into matrix */ 1712 jj = rstart; 1713 smycols = mycols; 1714 svals = vals; 1715 for (i=0; i<m; i++) { 1716 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1717 smycols += ourlens[i]; 1718 svals += ourlens[i]; 1719 jj++; 1720 } 1721 1722 /* read in other processors and ship out */ 1723 for (i=1; i<size; i++) { 1724 nz = procsnz[i]; 1725 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1726 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1727 } 1728 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1729 } else { 1730 /* receive numeric values */ 1731 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 1732 1733 /* receive message of values*/ 1734 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1735 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1736 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1737 1738 /* insert into matrix */ 1739 jj = rstart; 1740 smycols = mycols; 1741 svals = vals; 1742 for (i=0; i<m; i++) { 1743 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1744 smycols += ourlens[i]; 1745 svals += ourlens[i]; 1746 jj++; 1747 } 1748 } 1749 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 1750 ierr = PetscFree(vals);CHKERRQ(ierr); 1751 ierr = PetscFree(mycols);CHKERRQ(ierr); 1752 ierr = PetscFree(rowners);CHKERRQ(ierr); 1753 1754 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1755 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatEqual_MPIDense" 1761 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1762 { 1763 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1764 Mat a,b; 1765 PetscBool flg; 1766 PetscErrorCode ierr; 1767 1768 PetscFunctionBegin; 1769 a = matA->A; 1770 b = matB->A; 1771 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1772 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1773 PetscFunctionReturn(0); 1774 } 1775 1776