1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatDenseGetLocalMatrix" 12 /*@ 13 14 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 15 matrix that represents the operator. For sequential matrices it returns itself. 16 17 Input Parameter: 18 . A - the Seq or MPI dense matrix 19 20 Output Parameter: 21 . B - the inner matrix 22 23 Level: intermediate 24 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 29 PetscErrorCode ierr; 30 PetscBool flg; 31 32 PetscFunctionBegin; 33 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else *B = A; 36 PetscFunctionReturn(0); 37 } 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatGetRow_MPIDense" 41 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 42 { 43 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 44 PetscErrorCode ierr; 45 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 46 47 PetscFunctionBegin; 48 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 49 lrow = row - rstart; 50 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatRestoreRow_MPIDense" 56 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 57 { 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 62 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 68 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 69 { 70 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 71 PetscErrorCode ierr; 72 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 73 PetscScalar *array; 74 MPI_Comm comm; 75 Mat B; 76 77 PetscFunctionBegin; 78 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 79 80 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 81 if (!B) { 82 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 83 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 84 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 85 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 86 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 87 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 88 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 89 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 90 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 92 *a = B; 93 ierr = MatDestroy(&B);CHKERRQ(ierr); 94 } else *a = B; 95 PetscFunctionReturn(0); 96 } 97 98 #undef __FUNCT__ 99 #define __FUNCT__ "MatSetValues_MPIDense" 100 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 101 { 102 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 103 PetscErrorCode ierr; 104 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 105 PetscBool roworiented = A->roworiented; 106 107 PetscFunctionBegin; 108 if (v) PetscValidScalarPointer(v,6); 109 for (i=0; i<m; i++) { 110 if (idxm[i] < 0) continue; 111 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 112 if (idxm[i] >= rstart && idxm[i] < rend) { 113 row = idxm[i] - rstart; 114 if (roworiented) { 115 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 116 } else { 117 for (j=0; j<n; j++) { 118 if (idxn[j] < 0) continue; 119 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 120 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 121 } 122 } 123 } else if (!A->donotstash) { 124 mat->assembled = PETSC_FALSE; 125 if (roworiented) { 126 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 127 } else { 128 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 129 } 130 } 131 } 132 PetscFunctionReturn(0); 133 } 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatGetValues_MPIDense" 137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 138 { 139 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 140 PetscErrorCode ierr; 141 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 142 143 PetscFunctionBegin; 144 for (i=0; i<m; i++) { 145 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 146 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 147 if (idxm[i] >= rstart && idxm[i] < rend) { 148 row = idxm[i] - rstart; 149 for (j=0; j<n; j++) { 150 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 151 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 152 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 153 } 154 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "MatDenseGetArray_MPIDense" 161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 162 { 163 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 164 PetscErrorCode ierr; 165 166 PetscFunctionBegin; 167 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 174 { 175 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 176 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 177 PetscErrorCode ierr; 178 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 179 const PetscInt *irow,*icol; 180 PetscScalar *av,*bv,*v = lmat->v; 181 Mat newmat; 182 IS iscol_local; 183 184 PetscFunctionBegin; 185 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 186 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 187 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 188 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 189 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 190 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 191 192 /* No parallel redistribution currently supported! Should really check each index set 193 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 194 original matrix! */ 195 196 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 197 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 198 199 /* Check submatrix call */ 200 if (scall == MAT_REUSE_MATRIX) { 201 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 202 /* Really need to test rows and column sizes! */ 203 newmat = *B; 204 } else { 205 /* Create and fill new matrix */ 206 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 207 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 208 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 209 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 210 } 211 212 /* Now extract the data pointers and do the copy, column at a time */ 213 newmatd = (Mat_MPIDense*)newmat->data; 214 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 215 216 for (i=0; i<Ncols; i++) { 217 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 218 for (j=0; j<nrows; j++) { 219 *bv++ = av[irow[j] - rstart]; 220 } 221 } 222 223 /* Assemble the matrices so that the correct flags are set */ 224 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 225 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 226 227 /* Free work space */ 228 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 229 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 230 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 231 *B = newmat; 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 238 { 239 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 240 PetscErrorCode ierr; 241 242 PetscFunctionBegin; 243 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 250 { 251 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 252 MPI_Comm comm; 253 PetscErrorCode ierr; 254 PetscInt nstash,reallocs; 255 InsertMode addv; 256 257 PetscFunctionBegin; 258 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 259 /* make sure all processors are either in INSERTMODE or ADDMODE */ 260 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 261 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 262 mat->insertmode = addv; /* in case this processor had no cache */ 263 264 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 265 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 266 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 273 { 274 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 275 PetscErrorCode ierr; 276 PetscInt i,*row,*col,flg,j,rstart,ncols; 277 PetscMPIInt n; 278 PetscScalar *val; 279 InsertMode addv=mat->insertmode; 280 281 PetscFunctionBegin; 282 /* wait on receives */ 283 while (1) { 284 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 285 if (!flg) break; 286 287 for (i=0; i<n;) { 288 /* Now identify the consecutive vals belonging to the same row */ 289 for (j=i,rstart=row[j]; j<n; j++) { 290 if (row[j] != rstart) break; 291 } 292 if (j < n) ncols = j-i; 293 else ncols = n-i; 294 /* Now assemble all these values with a single function call */ 295 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 296 i = j; 297 } 298 } 299 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 300 301 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 302 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 303 304 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 305 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 306 } 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatZeroEntries_MPIDense" 312 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 313 { 314 PetscErrorCode ierr; 315 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 316 317 PetscFunctionBegin; 318 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 /* the code does not do the diagonal entries correctly unless the 323 matrix is square and the column and row owerships are identical. 324 This is a BUG. The only way to fix it seems to be to access 325 mdn->A and mdn->B directly and not through the MatZeroRows() 326 routine. 327 */ 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatZeroRows_MPIDense" 330 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 331 { 332 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 333 PetscErrorCode ierr; 334 PetscInt i,*owners = A->rmap->range; 335 PetscInt *sizes,j,idx,nsends; 336 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 337 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 338 PetscInt *lens,*lrows,*values; 339 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 340 MPI_Comm comm; 341 MPI_Request *send_waits,*recv_waits; 342 MPI_Status recv_status,*send_status; 343 PetscBool found; 344 const PetscScalar *xx; 345 PetscScalar *bb; 346 347 PetscFunctionBegin; 348 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 349 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 350 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 351 /* first count number of contributors to each processor */ 352 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 353 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 354 for (i=0; i<N; i++) { 355 idx = rows[i]; 356 found = PETSC_FALSE; 357 for (j=0; j<size; j++) { 358 if (idx >= owners[j] && idx < owners[j+1]) { 359 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 360 } 361 } 362 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 363 } 364 nsends = 0; 365 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 366 367 /* inform other processors of number of messages and max length*/ 368 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 369 370 /* post receives: */ 371 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 372 ierr = PetscMalloc1((nrecvs+1),&recv_waits);CHKERRQ(ierr); 373 for (i=0; i<nrecvs; i++) { 374 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 375 } 376 377 /* do sends: 378 1) starts[i] gives the starting index in svalues for stuff going to 379 the ith processor 380 */ 381 ierr = PetscMalloc1((N+1),&svalues);CHKERRQ(ierr); 382 ierr = PetscMalloc1((nsends+1),&send_waits);CHKERRQ(ierr); 383 ierr = PetscMalloc1((size+1),&starts);CHKERRQ(ierr); 384 385 starts[0] = 0; 386 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 387 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 388 389 starts[0] = 0; 390 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 391 count = 0; 392 for (i=0; i<size; i++) { 393 if (sizes[2*i+1]) { 394 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 395 } 396 } 397 ierr = PetscFree(starts);CHKERRQ(ierr); 398 399 base = owners[rank]; 400 401 /* wait on receives */ 402 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 403 count = nrecvs; 404 slen = 0; 405 while (count) { 406 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 407 /* unpack receives into our local space */ 408 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 409 410 source[imdex] = recv_status.MPI_SOURCE; 411 lens[imdex] = n; 412 slen += n; 413 count--; 414 } 415 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 416 417 /* move the data into the send scatter */ 418 ierr = PetscMalloc1((slen+1),&lrows);CHKERRQ(ierr); 419 count = 0; 420 for (i=0; i<nrecvs; i++) { 421 values = rvalues + i*nmax; 422 for (j=0; j<lens[i]; j++) { 423 lrows[count++] = values[j] - base; 424 } 425 } 426 ierr = PetscFree(rvalues);CHKERRQ(ierr); 427 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 428 ierr = PetscFree(owner);CHKERRQ(ierr); 429 ierr = PetscFree(sizes);CHKERRQ(ierr); 430 431 /* fix right hand side if needed */ 432 if (x && b) { 433 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 434 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 435 for (i=0; i<slen; i++) { 436 bb[lrows[i]] = diag*xx[lrows[i]]; 437 } 438 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 439 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 440 } 441 442 /* actually zap the local rows */ 443 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 444 if (diag != 0.0) { 445 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 446 PetscInt m = ll->lda, i; 447 448 for (i=0; i<slen; i++) { 449 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 450 } 451 } 452 ierr = PetscFree(lrows);CHKERRQ(ierr); 453 454 /* wait on sends */ 455 if (nsends) { 456 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 457 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 458 ierr = PetscFree(send_status);CHKERRQ(ierr); 459 } 460 ierr = PetscFree(send_waits);CHKERRQ(ierr); 461 ierr = PetscFree(svalues);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 #undef __FUNCT__ 466 #define __FUNCT__ "MatMult_MPIDense" 467 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 468 { 469 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 470 PetscErrorCode ierr; 471 472 PetscFunctionBegin; 473 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 474 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 475 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatMultAdd_MPIDense" 481 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 482 { 483 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 484 PetscErrorCode ierr; 485 486 PetscFunctionBegin; 487 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 488 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 489 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 #undef __FUNCT__ 494 #define __FUNCT__ "MatMultTranspose_MPIDense" 495 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 496 { 497 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 498 PetscErrorCode ierr; 499 PetscScalar zero = 0.0; 500 501 PetscFunctionBegin; 502 ierr = VecSet(yy,zero);CHKERRQ(ierr); 503 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 504 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 505 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 506 PetscFunctionReturn(0); 507 } 508 509 #undef __FUNCT__ 510 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 511 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 512 { 513 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 514 PetscErrorCode ierr; 515 516 PetscFunctionBegin; 517 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 518 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 519 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 520 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 521 PetscFunctionReturn(0); 522 } 523 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatGetDiagonal_MPIDense" 526 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 527 { 528 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 529 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 530 PetscErrorCode ierr; 531 PetscInt len,i,n,m = A->rmap->n,radd; 532 PetscScalar *x,zero = 0.0; 533 534 PetscFunctionBegin; 535 ierr = VecSet(v,zero);CHKERRQ(ierr); 536 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 537 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 538 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 539 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 540 radd = A->rmap->rstart*m; 541 for (i=0; i<len; i++) { 542 x[i] = aloc->v[radd + i*m + i]; 543 } 544 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatDestroy_MPIDense" 550 PetscErrorCode MatDestroy_MPIDense(Mat mat) 551 { 552 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 553 PetscErrorCode ierr; 554 555 PetscFunctionBegin; 556 #if defined(PETSC_USE_LOG) 557 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 558 #endif 559 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 560 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 561 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 562 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 563 564 ierr = PetscFree(mat->data);CHKERRQ(ierr); 565 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 568 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 570 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatView_MPIDense_Binary" 576 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 577 { 578 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 579 PetscErrorCode ierr; 580 PetscViewerFormat format; 581 int fd; 582 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 583 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 584 PetscScalar *work,*v,*vv; 585 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 586 587 PetscFunctionBegin; 588 if (mdn->size == 1) { 589 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 590 } else { 591 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 592 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 593 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 594 595 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 596 if (format == PETSC_VIEWER_NATIVE) { 597 598 if (!rank) { 599 /* store the matrix as a dense matrix */ 600 header[0] = MAT_FILE_CLASSID; 601 header[1] = mat->rmap->N; 602 header[2] = N; 603 header[3] = MATRIX_BINARY_FORMAT_DENSE; 604 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 605 606 /* get largest work array needed for transposing array */ 607 mmax = mat->rmap->n; 608 for (i=1; i<size; i++) { 609 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 610 } 611 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 612 613 /* write out local array, by rows */ 614 m = mat->rmap->n; 615 v = a->v; 616 for (j=0; j<N; j++) { 617 for (i=0; i<m; i++) { 618 work[j + i*N] = *v++; 619 } 620 } 621 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 622 /* get largest work array to receive messages from other processes, excludes process zero */ 623 mmax = 0; 624 for (i=1; i<size; i++) { 625 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 626 } 627 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 628 for (k = 1; k < size; k++) { 629 v = vv; 630 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 631 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 632 633 for (j = 0; j < N; j++) { 634 for (i = 0; i < m; i++) { 635 work[j + i*N] = *v++; 636 } 637 } 638 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(work);CHKERRQ(ierr); 641 ierr = PetscFree(vv);CHKERRQ(ierr); 642 } else { 643 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 644 } 645 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 646 } 647 PetscFunctionReturn(0); 648 } 649 650 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 651 #include <petscdraw.h> 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 654 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 655 { 656 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 657 PetscErrorCode ierr; 658 PetscMPIInt rank = mdn->rank; 659 PetscViewerType vtype; 660 PetscBool iascii,isdraw; 661 PetscViewer sviewer; 662 PetscViewerFormat format; 663 664 PetscFunctionBegin; 665 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 666 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 667 if (iascii) { 668 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 669 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 670 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 671 MatInfo info; 672 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 673 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 674 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 675 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 676 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 677 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } else if (format == PETSC_VIEWER_ASCII_INFO) { 680 PetscFunctionReturn(0); 681 } 682 } else if (isdraw) { 683 PetscDraw draw; 684 PetscBool isnull; 685 686 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 687 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 688 if (isnull) PetscFunctionReturn(0); 689 } 690 691 { 692 /* assemble the entire matrix onto first processor. */ 693 Mat A; 694 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 695 PetscInt *cols; 696 PetscScalar *vals; 697 698 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 699 if (!rank) { 700 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 701 } else { 702 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 703 } 704 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 705 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 706 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 707 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 708 709 /* Copy the matrix ... This isn't the most efficient means, 710 but it's quick for now */ 711 A->insertmode = INSERT_VALUES; 712 713 row = mat->rmap->rstart; 714 m = mdn->A->rmap->n; 715 for (i=0; i<m; i++) { 716 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 717 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 718 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 719 row++; 720 } 721 722 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 723 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 724 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 725 if (!rank) { 726 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 727 } 728 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 729 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 730 ierr = MatDestroy(&A);CHKERRQ(ierr); 731 } 732 PetscFunctionReturn(0); 733 } 734 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatView_MPIDense" 737 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 738 { 739 PetscErrorCode ierr; 740 PetscBool iascii,isbinary,isdraw,issocket; 741 742 PetscFunctionBegin; 743 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 744 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 745 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 746 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 747 748 if (iascii || issocket || isdraw) { 749 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 750 } else if (isbinary) { 751 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatGetInfo_MPIDense" 758 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 759 { 760 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 761 Mat mdn = mat->A; 762 PetscErrorCode ierr; 763 PetscReal isend[5],irecv[5]; 764 765 PetscFunctionBegin; 766 info->block_size = 1.0; 767 768 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 769 770 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 771 isend[3] = info->memory; isend[4] = info->mallocs; 772 if (flag == MAT_LOCAL) { 773 info->nz_used = isend[0]; 774 info->nz_allocated = isend[1]; 775 info->nz_unneeded = isend[2]; 776 info->memory = isend[3]; 777 info->mallocs = isend[4]; 778 } else if (flag == MAT_GLOBAL_MAX) { 779 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 780 781 info->nz_used = irecv[0]; 782 info->nz_allocated = irecv[1]; 783 info->nz_unneeded = irecv[2]; 784 info->memory = irecv[3]; 785 info->mallocs = irecv[4]; 786 } else if (flag == MAT_GLOBAL_SUM) { 787 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 788 789 info->nz_used = irecv[0]; 790 info->nz_allocated = irecv[1]; 791 info->nz_unneeded = irecv[2]; 792 info->memory = irecv[3]; 793 info->mallocs = irecv[4]; 794 } 795 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 796 info->fill_ratio_needed = 0; 797 info->factor_mallocs = 0; 798 PetscFunctionReturn(0); 799 } 800 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatSetOption_MPIDense" 803 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 804 { 805 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 806 PetscErrorCode ierr; 807 808 PetscFunctionBegin; 809 switch (op) { 810 case MAT_NEW_NONZERO_LOCATIONS: 811 case MAT_NEW_NONZERO_LOCATION_ERR: 812 case MAT_NEW_NONZERO_ALLOCATION_ERR: 813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 814 break; 815 case MAT_ROW_ORIENTED: 816 a->roworiented = flg; 817 818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 819 break; 820 case MAT_NEW_DIAGONALS: 821 case MAT_KEEP_NONZERO_PATTERN: 822 case MAT_USE_HASH_TABLE: 823 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 824 break; 825 case MAT_IGNORE_OFF_PROC_ENTRIES: 826 a->donotstash = flg; 827 break; 828 case MAT_SYMMETRIC: 829 case MAT_STRUCTURALLY_SYMMETRIC: 830 case MAT_HERMITIAN: 831 case MAT_SYMMETRY_ETERNAL: 832 case MAT_IGNORE_LOWER_TRIANGULAR: 833 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 834 break; 835 default: 836 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 837 } 838 PetscFunctionReturn(0); 839 } 840 841 842 #undef __FUNCT__ 843 #define __FUNCT__ "MatDiagonalScale_MPIDense" 844 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 845 { 846 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 847 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 848 PetscScalar *l,*r,x,*v; 849 PetscErrorCode ierr; 850 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 851 852 PetscFunctionBegin; 853 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 854 if (ll) { 855 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 856 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 857 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 858 for (i=0; i<m; i++) { 859 x = l[i]; 860 v = mat->v + i; 861 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 862 } 863 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 864 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 865 } 866 if (rr) { 867 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 868 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 869 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 870 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 872 for (i=0; i<n; i++) { 873 x = r[i]; 874 v = mat->v + i*m; 875 for (j=0; j<m; j++) (*v++) *= x; 876 } 877 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 878 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 879 } 880 PetscFunctionReturn(0); 881 } 882 883 #undef __FUNCT__ 884 #define __FUNCT__ "MatNorm_MPIDense" 885 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 886 { 887 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 888 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 889 PetscErrorCode ierr; 890 PetscInt i,j; 891 PetscReal sum = 0.0; 892 PetscScalar *v = mat->v; 893 894 PetscFunctionBegin; 895 if (mdn->size == 1) { 896 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 897 } else { 898 if (type == NORM_FROBENIUS) { 899 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 900 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 901 } 902 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 903 *nrm = PetscSqrtReal(*nrm); 904 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 905 } else if (type == NORM_1) { 906 PetscReal *tmp,*tmp2; 907 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 908 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 909 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 910 *nrm = 0.0; 911 v = mat->v; 912 for (j=0; j<mdn->A->cmap->n; j++) { 913 for (i=0; i<mdn->A->rmap->n; i++) { 914 tmp[j] += PetscAbsScalar(*v); v++; 915 } 916 } 917 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 918 for (j=0; j<A->cmap->N; j++) { 919 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 920 } 921 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 922 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 923 } else if (type == NORM_INFINITY) { /* max row norm */ 924 PetscReal ntemp; 925 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 926 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 927 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 928 } 929 PetscFunctionReturn(0); 930 } 931 932 #undef __FUNCT__ 933 #define __FUNCT__ "MatTranspose_MPIDense" 934 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 935 { 936 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 937 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 938 Mat B; 939 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 940 PetscErrorCode ierr; 941 PetscInt j,i; 942 PetscScalar *v; 943 944 PetscFunctionBegin; 945 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 946 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 947 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 948 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 949 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 950 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 951 } else { 952 B = *matout; 953 } 954 955 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 956 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 957 for (i=0; i<m; i++) rwork[i] = rstart + i; 958 for (j=0; j<n; j++) { 959 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 960 v += m; 961 } 962 ierr = PetscFree(rwork);CHKERRQ(ierr); 963 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 964 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 965 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 966 *matout = B; 967 } else { 968 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 969 } 970 PetscFunctionReturn(0); 971 } 972 973 974 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 975 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatSetUp_MPIDense" 979 PetscErrorCode MatSetUp_MPIDense(Mat A) 980 { 981 PetscErrorCode ierr; 982 983 PetscFunctionBegin; 984 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } 987 988 #undef __FUNCT__ 989 #define __FUNCT__ "MatAXPY_MPIDense" 990 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 991 { 992 PetscErrorCode ierr; 993 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 994 995 PetscFunctionBegin; 996 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 997 PetscFunctionReturn(0); 998 } 999 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatConjugate_MPIDense" 1002 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1003 { 1004 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1005 PetscErrorCode ierr; 1006 1007 PetscFunctionBegin; 1008 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatRealPart_MPIDense" 1014 PetscErrorCode MatRealPart_MPIDense(Mat A) 1015 { 1016 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1017 PetscErrorCode ierr; 1018 1019 PetscFunctionBegin; 1020 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1026 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1027 { 1028 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1029 PetscErrorCode ierr; 1030 1031 PetscFunctionBegin; 1032 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1033 PetscFunctionReturn(0); 1034 } 1035 1036 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1039 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1040 { 1041 PetscErrorCode ierr; 1042 PetscInt i,n; 1043 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1044 PetscReal *work; 1045 1046 PetscFunctionBegin; 1047 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1048 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1049 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1050 if (type == NORM_2) { 1051 for (i=0; i<n; i++) work[i] *= work[i]; 1052 } 1053 if (type == NORM_INFINITY) { 1054 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1055 } else { 1056 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1057 } 1058 ierr = PetscFree(work);CHKERRQ(ierr); 1059 if (type == NORM_2) { 1060 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatSetRandom_MPIDense" 1067 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1068 { 1069 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1070 PetscErrorCode ierr; 1071 PetscScalar *a; 1072 PetscInt m,n,i; 1073 1074 PetscFunctionBegin; 1075 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1076 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1077 for (i=0; i<m*n; i++) { 1078 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1079 } 1080 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 /* -------------------------------------------------------------------*/ 1085 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1086 MatGetRow_MPIDense, 1087 MatRestoreRow_MPIDense, 1088 MatMult_MPIDense, 1089 /* 4*/ MatMultAdd_MPIDense, 1090 MatMultTranspose_MPIDense, 1091 MatMultTransposeAdd_MPIDense, 1092 0, 1093 0, 1094 0, 1095 /* 10*/ 0, 1096 0, 1097 0, 1098 0, 1099 MatTranspose_MPIDense, 1100 /* 15*/ MatGetInfo_MPIDense, 1101 MatEqual_MPIDense, 1102 MatGetDiagonal_MPIDense, 1103 MatDiagonalScale_MPIDense, 1104 MatNorm_MPIDense, 1105 /* 20*/ MatAssemblyBegin_MPIDense, 1106 MatAssemblyEnd_MPIDense, 1107 MatSetOption_MPIDense, 1108 MatZeroEntries_MPIDense, 1109 /* 24*/ MatZeroRows_MPIDense, 1110 0, 1111 0, 1112 0, 1113 0, 1114 /* 29*/ MatSetUp_MPIDense, 1115 0, 1116 0, 1117 0, 1118 0, 1119 /* 34*/ MatDuplicate_MPIDense, 1120 0, 1121 0, 1122 0, 1123 0, 1124 /* 39*/ MatAXPY_MPIDense, 1125 MatGetSubMatrices_MPIDense, 1126 0, 1127 MatGetValues_MPIDense, 1128 0, 1129 /* 44*/ 0, 1130 MatScale_MPIDense, 1131 0, 1132 0, 1133 0, 1134 /* 49*/ MatSetRandom_MPIDense, 1135 0, 1136 0, 1137 0, 1138 0, 1139 /* 54*/ 0, 1140 0, 1141 0, 1142 0, 1143 0, 1144 /* 59*/ MatGetSubMatrix_MPIDense, 1145 MatDestroy_MPIDense, 1146 MatView_MPIDense, 1147 0, 1148 0, 1149 /* 64*/ 0, 1150 0, 1151 0, 1152 0, 1153 0, 1154 /* 69*/ 0, 1155 0, 1156 0, 1157 0, 1158 0, 1159 /* 74*/ 0, 1160 0, 1161 0, 1162 0, 1163 0, 1164 /* 79*/ 0, 1165 0, 1166 0, 1167 0, 1168 /* 83*/ MatLoad_MPIDense, 1169 0, 1170 0, 1171 0, 1172 0, 1173 0, 1174 /* 89*/ 1175 0, 1176 0, 1177 0, 1178 0, 1179 0, 1180 /* 94*/ 0, 1181 0, 1182 0, 1183 0, 1184 0, 1185 /* 99*/ 0, 1186 0, 1187 0, 1188 MatConjugate_MPIDense, 1189 0, 1190 /*104*/ 0, 1191 MatRealPart_MPIDense, 1192 MatImaginaryPart_MPIDense, 1193 0, 1194 0, 1195 /*109*/ 0, 1196 0, 1197 0, 1198 0, 1199 0, 1200 /*114*/ 0, 1201 0, 1202 0, 1203 0, 1204 0, 1205 /*119*/ 0, 1206 0, 1207 0, 1208 0, 1209 0, 1210 /*124*/ 0, 1211 MatGetColumnNorms_MPIDense, 1212 0, 1213 0, 1214 0, 1215 /*129*/ 0, 1216 0, 1217 0, 1218 0, 1219 0, 1220 /*134*/ 0, 1221 0, 1222 0, 1223 0, 1224 0, 1225 /*139*/ 0, 1226 0, 1227 0 1228 }; 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1232 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1233 { 1234 Mat_MPIDense *a; 1235 PetscErrorCode ierr; 1236 1237 PetscFunctionBegin; 1238 mat->preallocated = PETSC_TRUE; 1239 /* Note: For now, when data is specified above, this assumes the user correctly 1240 allocates the local dense storage space. We should add error checking. */ 1241 1242 a = (Mat_MPIDense*)mat->data; 1243 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1244 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1245 a->nvec = mat->cmap->n; 1246 1247 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1248 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1249 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1250 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1251 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatCreate_MPIDense" 1257 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1258 { 1259 Mat_MPIDense *a; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1264 mat->data = (void*)a; 1265 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1266 1267 mat->insertmode = NOT_SET_VALUES; 1268 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1269 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1270 1271 /* build cache for off array entries formed */ 1272 a->donotstash = PETSC_FALSE; 1273 1274 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1275 1276 /* stuff used for matrix vector multiply */ 1277 a->lvec = 0; 1278 a->Mvctx = 0; 1279 a->roworiented = PETSC_TRUE; 1280 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1283 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1289 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1293 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 /*MC 1298 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1299 1300 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1301 and MATMPIDENSE otherwise. 1302 1303 Options Database Keys: 1304 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1305 1306 Level: beginner 1307 1308 1309 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1310 M*/ 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1314 /*@C 1315 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1316 1317 Not collective 1318 1319 Input Parameters: 1320 . A - the matrix 1321 - data - optional location of matrix data. Set data=NULL for PETSc 1322 to control all matrix memory allocation. 1323 1324 Notes: 1325 The dense format is fully compatible with standard Fortran 77 1326 storage by columns. 1327 1328 The data input variable is intended primarily for Fortran programmers 1329 who wish to allocate their own matrix memory space. Most users should 1330 set data=NULL. 1331 1332 Level: intermediate 1333 1334 .keywords: matrix,dense, parallel 1335 1336 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1337 @*/ 1338 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1339 { 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr); 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #undef __FUNCT__ 1348 #define __FUNCT__ "MatCreateDense" 1349 /*@C 1350 MatCreateDense - Creates a parallel matrix in dense format. 1351 1352 Collective on MPI_Comm 1353 1354 Input Parameters: 1355 + comm - MPI communicator 1356 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1357 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1358 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1359 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1360 - data - optional location of matrix data. Set data=NULL (NULL_SCALAR for Fortran users) for PETSc 1361 to control all matrix memory allocation. 1362 1363 Output Parameter: 1364 . A - the matrix 1365 1366 Notes: 1367 The dense format is fully compatible with standard Fortran 77 1368 storage by columns. 1369 1370 The data input variable is intended primarily for Fortran programmers 1371 who wish to allocate their own matrix memory space. Most users should 1372 set data=NULL (NULL_SCALAR for Fortran users). 1373 1374 The user MUST specify either the local or global matrix dimensions 1375 (possibly both). 1376 1377 Level: intermediate 1378 1379 .keywords: matrix,dense, parallel 1380 1381 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1382 @*/ 1383 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1384 { 1385 PetscErrorCode ierr; 1386 PetscMPIInt size; 1387 1388 PetscFunctionBegin; 1389 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1390 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1391 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1392 if (size > 1) { 1393 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1394 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1395 } else { 1396 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1397 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1398 } 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatDuplicate_MPIDense" 1404 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1405 { 1406 Mat mat; 1407 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1408 PetscErrorCode ierr; 1409 1410 PetscFunctionBegin; 1411 *newmat = 0; 1412 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1413 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1414 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1415 a = (Mat_MPIDense*)mat->data; 1416 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1417 1418 mat->factortype = A->factortype; 1419 mat->assembled = PETSC_TRUE; 1420 mat->preallocated = PETSC_TRUE; 1421 1422 a->size = oldmat->size; 1423 a->rank = oldmat->rank; 1424 mat->insertmode = NOT_SET_VALUES; 1425 a->nvec = oldmat->nvec; 1426 a->donotstash = oldmat->donotstash; 1427 1428 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1429 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1430 1431 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1432 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1433 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1434 1435 *newmat = mat; 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1441 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1442 { 1443 PetscErrorCode ierr; 1444 PetscMPIInt rank,size; 1445 PetscInt *rowners,i,m,nz,j; 1446 PetscScalar *array,*vals,*vals_ptr; 1447 1448 PetscFunctionBegin; 1449 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1450 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1451 1452 /* determine ownership of all rows */ 1453 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1454 else m = newmat->rmap->n; 1455 ierr = PetscMalloc1((size+2),&rowners);CHKERRQ(ierr); 1456 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1457 rowners[0] = 0; 1458 for (i=2; i<=size; i++) { 1459 rowners[i] += rowners[i-1]; 1460 } 1461 1462 if (!sizesset) { 1463 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1464 } 1465 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1466 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1467 1468 if (!rank) { 1469 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1470 1471 /* read in my part of the matrix numerical values */ 1472 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1473 1474 /* insert into matrix-by row (this is why cannot directly read into array */ 1475 vals_ptr = vals; 1476 for (i=0; i<m; i++) { 1477 for (j=0; j<N; j++) { 1478 array[i + j*m] = *vals_ptr++; 1479 } 1480 } 1481 1482 /* read in other processors and ship out */ 1483 for (i=1; i<size; i++) { 1484 nz = (rowners[i+1] - rowners[i])*N; 1485 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1486 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1487 } 1488 } else { 1489 /* receive numeric values */ 1490 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1491 1492 /* receive message of values*/ 1493 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1494 1495 /* insert into matrix-by row (this is why cannot directly read into array */ 1496 vals_ptr = vals; 1497 for (i=0; i<m; i++) { 1498 for (j=0; j<N; j++) { 1499 array[i + j*m] = *vals_ptr++; 1500 } 1501 } 1502 } 1503 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1504 ierr = PetscFree(rowners);CHKERRQ(ierr); 1505 ierr = PetscFree(vals);CHKERRQ(ierr); 1506 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1507 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1508 PetscFunctionReturn(0); 1509 } 1510 1511 #undef __FUNCT__ 1512 #define __FUNCT__ "MatLoad_MPIDense" 1513 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1514 { 1515 PetscScalar *vals,*svals; 1516 MPI_Comm comm; 1517 MPI_Status status; 1518 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1519 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1520 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1521 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1522 int fd; 1523 PetscErrorCode ierr; 1524 1525 PetscFunctionBegin; 1526 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1527 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1528 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1529 if (!rank) { 1530 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1531 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1532 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1533 } 1534 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1535 1536 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1537 M = header[1]; N = header[2]; nz = header[3]; 1538 1539 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1540 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1541 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1542 1543 /* If global sizes are set, check if they are consistent with that given in the file */ 1544 if (sizesset) { 1545 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1546 } 1547 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1548 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1549 1550 /* 1551 Handle case where matrix is stored on disk as a dense matrix 1552 */ 1553 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1554 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 /* determine ownership of all rows */ 1559 if (newmat->rmap->n < 0) { 1560 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1561 } else { 1562 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1563 } 1564 ierr = PetscMalloc1((size+2),&rowners);CHKERRQ(ierr); 1565 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1566 rowners[0] = 0; 1567 for (i=2; i<=size; i++) { 1568 rowners[i] += rowners[i-1]; 1569 } 1570 rstart = rowners[rank]; 1571 rend = rowners[rank+1]; 1572 1573 /* distribute row lengths to all processors */ 1574 ierr = PetscMalloc2(rend-rstart,&ourlens,rend-rstart,&offlens);CHKERRQ(ierr); 1575 if (!rank) { 1576 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1577 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1578 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1579 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1580 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1581 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1582 } else { 1583 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1584 } 1585 1586 if (!rank) { 1587 /* calculate the number of nonzeros on each processor */ 1588 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1589 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1590 for (i=0; i<size; i++) { 1591 for (j=rowners[i]; j< rowners[i+1]; j++) { 1592 procsnz[i] += rowlengths[j]; 1593 } 1594 } 1595 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1596 1597 /* determine max buffer needed and allocate it */ 1598 maxnz = 0; 1599 for (i=0; i<size; i++) { 1600 maxnz = PetscMax(maxnz,procsnz[i]); 1601 } 1602 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1603 1604 /* read in my part of the matrix column indices */ 1605 nz = procsnz[0]; 1606 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1607 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1608 1609 /* read in every one elses and ship off */ 1610 for (i=1; i<size; i++) { 1611 nz = procsnz[i]; 1612 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1613 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1614 } 1615 ierr = PetscFree(cols);CHKERRQ(ierr); 1616 } else { 1617 /* determine buffer space needed for message */ 1618 nz = 0; 1619 for (i=0; i<m; i++) { 1620 nz += ourlens[i]; 1621 } 1622 ierr = PetscMalloc1((nz+1),&mycols);CHKERRQ(ierr); 1623 1624 /* receive message of column indices*/ 1625 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1626 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1627 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1628 } 1629 1630 /* loop over local rows, determining number of off diagonal entries */ 1631 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1632 jj = 0; 1633 for (i=0; i<m; i++) { 1634 for (j=0; j<ourlens[i]; j++) { 1635 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1636 jj++; 1637 } 1638 } 1639 1640 /* create our matrix */ 1641 for (i=0; i<m; i++) ourlens[i] -= offlens[i]; 1642 1643 if (!sizesset) { 1644 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1645 } 1646 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1647 for (i=0; i<m; i++) ourlens[i] += offlens[i]; 1648 1649 if (!rank) { 1650 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1651 1652 /* read in my part of the matrix numerical values */ 1653 nz = procsnz[0]; 1654 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1655 1656 /* insert into matrix */ 1657 jj = rstart; 1658 smycols = mycols; 1659 svals = vals; 1660 for (i=0; i<m; i++) { 1661 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1662 smycols += ourlens[i]; 1663 svals += ourlens[i]; 1664 jj++; 1665 } 1666 1667 /* read in other processors and ship out */ 1668 for (i=1; i<size; i++) { 1669 nz = procsnz[i]; 1670 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1671 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1672 } 1673 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1674 } else { 1675 /* receive numeric values */ 1676 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 1677 1678 /* receive message of values*/ 1679 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1680 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1681 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1682 1683 /* insert into matrix */ 1684 jj = rstart; 1685 smycols = mycols; 1686 svals = vals; 1687 for (i=0; i<m; i++) { 1688 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1689 smycols += ourlens[i]; 1690 svals += ourlens[i]; 1691 jj++; 1692 } 1693 } 1694 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 1695 ierr = PetscFree(vals);CHKERRQ(ierr); 1696 ierr = PetscFree(mycols);CHKERRQ(ierr); 1697 ierr = PetscFree(rowners);CHKERRQ(ierr); 1698 1699 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatEqual_MPIDense" 1706 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1707 { 1708 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1709 Mat a,b; 1710 PetscBool flg; 1711 PetscErrorCode ierr; 1712 1713 PetscFunctionBegin; 1714 a = matA->A; 1715 b = matB->A; 1716 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1717 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1718 PetscFunctionReturn(0); 1719 } 1720 1721