1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) { 34 *B = mat->A; 35 } else { 36 *B = A; 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatGetRow_MPIDense" 43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 48 49 PetscFunctionBegin; 50 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 51 lrow = row - rstart; 52 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatRestoreRow_MPIDense" 58 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 59 { 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 64 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 71 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 72 { 73 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 74 PetscErrorCode ierr; 75 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 76 PetscScalar *array; 77 MPI_Comm comm; 78 Mat B; 79 80 PetscFunctionBegin; 81 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 82 83 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 84 if (!B) { 85 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 86 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 87 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 88 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 89 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 90 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 91 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 93 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 94 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 95 *a = B; 96 ierr = MatDestroy(&B);CHKERRQ(ierr); 97 } else { 98 *a = B; 99 } 100 PetscFunctionReturn(0); 101 } 102 EXTERN_C_END 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatSetValues_MPIDense" 106 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 107 { 108 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 109 PetscErrorCode ierr; 110 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 111 PetscBool roworiented = A->roworiented; 112 113 PetscFunctionBegin; 114 if (v) PetscValidScalarPointer(v,6); 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 mat->assembled = PETSC_FALSE; 132 if (roworiented) { 133 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 134 } else { 135 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 136 } 137 } 138 } 139 } 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatGetValues_MPIDense" 145 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 146 { 147 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 148 PetscErrorCode ierr; 149 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 150 151 PetscFunctionBegin; 152 for (i=0; i<m; i++) { 153 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 154 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 155 if (idxm[i] >= rstart && idxm[i] < rend) { 156 row = idxm[i] - rstart; 157 for (j=0; j<n; j++) { 158 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 159 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 161 } 162 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 163 } 164 PetscFunctionReturn(0); 165 } 166 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatDenseGetArray_MPIDense" 169 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 170 { 171 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 172 PetscErrorCode ierr; 173 174 PetscFunctionBegin; 175 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 #undef __FUNCT__ 180 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 181 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 182 { 183 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 184 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 185 PetscErrorCode ierr; 186 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 187 const PetscInt *irow,*icol; 188 PetscScalar *av,*bv,*v = lmat->v; 189 Mat newmat; 190 IS iscol_local; 191 192 PetscFunctionBegin; 193 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 194 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 195 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 196 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 197 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 198 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 199 200 /* No parallel redistribution currently supported! Should really check each index set 201 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 202 original matrix! */ 203 204 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 205 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 206 207 /* Check submatrix call */ 208 if (scall == MAT_REUSE_MATRIX) { 209 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 210 /* Really need to test rows and column sizes! */ 211 newmat = *B; 212 } else { 213 /* Create and fill new matrix */ 214 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 215 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 216 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 217 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 218 } 219 220 /* Now extract the data pointers and do the copy, column at a time */ 221 newmatd = (Mat_MPIDense*)newmat->data; 222 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 223 224 for (i=0; i<Ncols; i++) { 225 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 226 for (j=0; j<nrows; j++) { 227 *bv++ = av[irow[j] - rstart]; 228 } 229 } 230 231 /* Assemble the matrices so that the correct flags are set */ 232 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 234 235 /* Free work space */ 236 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 237 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 238 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 239 *B = newmat; 240 PetscFunctionReturn(0); 241 } 242 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 245 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 246 { 247 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 248 PetscErrorCode ierr; 249 250 PetscFunctionBegin; 251 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 257 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 258 { 259 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 260 MPI_Comm comm = ((PetscObject)mat)->comm; 261 PetscErrorCode ierr; 262 PetscInt nstash,reallocs; 263 InsertMode addv; 264 265 PetscFunctionBegin; 266 /* make sure all processors are either in INSERTMODE or ADDMODE */ 267 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 268 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 269 mat->insertmode = addv; /* in case this processor had no cache */ 270 271 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 272 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 273 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 279 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 280 { 281 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 282 PetscErrorCode ierr; 283 PetscInt i,*row,*col,flg,j,rstart,ncols; 284 PetscMPIInt n; 285 PetscScalar *val; 286 InsertMode addv=mat->insertmode; 287 288 PetscFunctionBegin; 289 /* wait on receives */ 290 while (1) { 291 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 292 if (!flg) break; 293 294 for (i=0; i<n;) { 295 /* Now identify the consecutive vals belonging to the same row */ 296 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 297 if (j < n) ncols = j-i; 298 else ncols = n-i; 299 /* Now assemble all these values with a single function call */ 300 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 301 i = j; 302 } 303 } 304 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 305 306 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 307 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 308 309 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 310 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatZeroEntries_MPIDense" 317 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 318 { 319 PetscErrorCode ierr; 320 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 321 322 PetscFunctionBegin; 323 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 /* the code does not do the diagonal entries correctly unless the 328 matrix is square and the column and row owerships are identical. 329 This is a BUG. The only way to fix it seems to be to access 330 mdn->A and mdn->B directly and not through the MatZeroRows() 331 routine. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatZeroRows_MPIDense" 335 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 336 { 337 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 338 PetscErrorCode ierr; 339 PetscInt i,*owners = A->rmap->range; 340 PetscInt *nprocs,j,idx,nsends; 341 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 342 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 343 PetscInt *lens,*lrows,*values; 344 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 345 MPI_Comm comm = ((PetscObject)A)->comm; 346 MPI_Request *send_waits,*recv_waits; 347 MPI_Status recv_status,*send_status; 348 PetscBool found; 349 const PetscScalar *xx; 350 PetscScalar *bb; 351 352 PetscFunctionBegin; 353 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Only handles square matrices"); 354 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 355 /* first count number of contributors to each processor */ 356 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 357 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 358 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 359 for (i=0; i<N; i++) { 360 idx = rows[i]; 361 found = PETSC_FALSE; 362 for (j=0; j<size; j++) { 363 if (idx >= owners[j] && idx < owners[j+1]) { 364 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 365 } 366 } 367 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 368 } 369 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 370 371 /* inform other processors of number of messages and max length*/ 372 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 373 374 /* post receives: */ 375 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 376 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 377 for (i=0; i<nrecvs; i++) { 378 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 379 } 380 381 /* do sends: 382 1) starts[i] gives the starting index in svalues for stuff going to 383 the ith processor 384 */ 385 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 386 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 387 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 388 starts[0] = 0; 389 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 390 for (i=0; i<N; i++) { 391 svalues[starts[owner[i]]++] = rows[i]; 392 } 393 394 starts[0] = 0; 395 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 396 count = 0; 397 for (i=0; i<size; i++) { 398 if (nprocs[2*i+1]) { 399 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 400 } 401 } 402 ierr = PetscFree(starts);CHKERRQ(ierr); 403 404 base = owners[rank]; 405 406 /* wait on receives */ 407 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 408 count = nrecvs; 409 slen = 0; 410 while (count) { 411 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 412 /* unpack receives into our local space */ 413 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 414 source[imdex] = recv_status.MPI_SOURCE; 415 lens[imdex] = n; 416 slen += n; 417 count--; 418 } 419 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 420 421 /* move the data into the send scatter */ 422 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 423 count = 0; 424 for (i=0; i<nrecvs; i++) { 425 values = rvalues + i*nmax; 426 for (j=0; j<lens[i]; j++) { 427 lrows[count++] = values[j] - base; 428 } 429 } 430 ierr = PetscFree(rvalues);CHKERRQ(ierr); 431 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 432 ierr = PetscFree(owner);CHKERRQ(ierr); 433 ierr = PetscFree(nprocs);CHKERRQ(ierr); 434 435 /* fix right hand side if needed */ 436 if (x && b) { 437 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 438 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 439 for (i=0; i<slen; i++) { 440 bb[lrows[i]] = diag*xx[lrows[i]]; 441 } 442 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 443 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 444 } 445 446 /* actually zap the local rows */ 447 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 448 if (diag != 0.0) { 449 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 450 PetscInt m = ll->lda, i; 451 452 for (i=0; i<slen; i++) { 453 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 454 } 455 } 456 ierr = PetscFree(lrows);CHKERRQ(ierr); 457 458 /* wait on sends */ 459 if (nsends) { 460 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 461 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 462 ierr = PetscFree(send_status);CHKERRQ(ierr); 463 } 464 ierr = PetscFree(send_waits);CHKERRQ(ierr); 465 ierr = PetscFree(svalues);CHKERRQ(ierr); 466 467 PetscFunctionReturn(0); 468 } 469 470 #undef __FUNCT__ 471 #define __FUNCT__ "MatMult_MPIDense" 472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 477 PetscFunctionBegin; 478 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 480 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatMultAdd_MPIDense" 486 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 487 { 488 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 489 PetscErrorCode ierr; 490 491 PetscFunctionBegin; 492 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 493 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 494 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatMultTranspose_MPIDense" 500 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 501 { 502 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 503 PetscErrorCode ierr; 504 PetscScalar zero = 0.0; 505 506 PetscFunctionBegin; 507 ierr = VecSet(yy,zero);CHKERRQ(ierr); 508 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 509 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 510 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 511 PetscFunctionReturn(0); 512 } 513 514 #undef __FUNCT__ 515 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 516 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 517 { 518 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 519 PetscErrorCode ierr; 520 521 PetscFunctionBegin; 522 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 523 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 524 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 525 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 #undef __FUNCT__ 530 #define __FUNCT__ "MatGetDiagonal_MPIDense" 531 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 532 { 533 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 534 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 535 PetscErrorCode ierr; 536 PetscInt len,i,n,m = A->rmap->n,radd; 537 PetscScalar *x,zero = 0.0; 538 539 PetscFunctionBegin; 540 ierr = VecSet(v,zero);CHKERRQ(ierr); 541 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 542 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 543 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 544 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 545 radd = A->rmap->rstart*m; 546 for (i=0; i<len; i++) { 547 x[i] = aloc->v[radd + i*m + i]; 548 } 549 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 550 PetscFunctionReturn(0); 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "MatDestroy_MPIDense" 555 PetscErrorCode MatDestroy_MPIDense(Mat mat) 556 { 557 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 558 PetscErrorCode ierr; 559 560 PetscFunctionBegin; 561 #if defined(PETSC_USE_LOG) 562 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 563 #endif 564 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 565 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 566 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 567 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 568 569 ierr = PetscFree(mat->data);CHKERRQ(ierr); 570 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 571 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatView_MPIDense_Binary" 581 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 582 { 583 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 584 PetscErrorCode ierr; 585 PetscViewerFormat format; 586 int fd; 587 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 588 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 589 PetscScalar *work,*v,*vv; 590 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 591 592 PetscFunctionBegin; 593 if (mdn->size == 1) { 594 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 595 } else { 596 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 597 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 598 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 599 600 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 601 if (format == PETSC_VIEWER_NATIVE) { 602 603 if (!rank) { 604 /* store the matrix as a dense matrix */ 605 header[0] = MAT_FILE_CLASSID; 606 header[1] = mat->rmap->N; 607 header[2] = N; 608 header[3] = MATRIX_BINARY_FORMAT_DENSE; 609 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 610 611 /* get largest work array needed for transposing array */ 612 mmax = mat->rmap->n; 613 for (i=1; i<size; i++) { 614 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 615 } 616 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 617 618 /* write out local array, by rows */ 619 m = mat->rmap->n; 620 v = a->v; 621 for (j=0; j<N; j++) { 622 for (i=0; i<m; i++) { 623 work[j + i*N] = *v++; 624 } 625 } 626 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 627 /* get largest work array to receive messages from other processes, excludes process zero */ 628 mmax = 0; 629 for (i=1; i<size; i++) { 630 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 631 } 632 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 633 for (k = 1; k < size; k++) { 634 v = vv; 635 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 636 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 637 638 for (j = 0; j < N; j++) { 639 for (i = 0; i < m; i++) { 640 work[j + i*N] = *v++; 641 } 642 } 643 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 644 } 645 ierr = PetscFree(work);CHKERRQ(ierr); 646 ierr = PetscFree(vv);CHKERRQ(ierr); 647 } else { 648 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 649 } 650 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 651 } 652 PetscFunctionReturn(0); 653 } 654 655 #undef __FUNCT__ 656 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 657 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 658 { 659 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 660 PetscErrorCode ierr; 661 PetscMPIInt size = mdn->size,rank = mdn->rank; 662 PetscViewerType vtype; 663 PetscBool iascii,isdraw; 664 PetscViewer sviewer; 665 PetscViewerFormat format; 666 667 PetscFunctionBegin; 668 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 669 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 670 if (iascii) { 671 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 672 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 673 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 674 MatInfo info; 675 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 676 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 677 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 678 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 679 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 680 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } else if (format == PETSC_VIEWER_ASCII_INFO) { 683 PetscFunctionReturn(0); 684 } 685 } else if (isdraw) { 686 PetscDraw draw; 687 PetscBool isnull; 688 689 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 690 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 691 if (isnull) PetscFunctionReturn(0); 692 } 693 694 if (size == 1) { 695 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 696 } else { 697 /* assemble the entire matrix onto first processor. */ 698 Mat A; 699 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 700 PetscInt *cols; 701 PetscScalar *vals; 702 703 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 704 if (!rank) { 705 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 706 } else { 707 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 708 } 709 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 710 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 711 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);CHKERRQ(ierr); 712 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 713 714 /* Copy the matrix ... This isn't the most efficient means, 715 but it's quick for now */ 716 A->insertmode = INSERT_VALUES; 717 row = mat->rmap->rstart; m = mdn->A->rmap->n; 718 for (i=0; i<m; i++) { 719 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 720 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 721 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 722 row++; 723 } 724 725 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 726 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 727 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 728 if (!rank) { 729 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 730 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 731 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 732 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 733 } 734 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 735 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 736 ierr = MatDestroy(&A);CHKERRQ(ierr); 737 } 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatView_MPIDense" 743 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 744 { 745 PetscErrorCode ierr; 746 PetscBool iascii,isbinary,isdraw,issocket; 747 748 PetscFunctionBegin; 749 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 750 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 751 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 752 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 753 754 if (iascii || issocket || isdraw) { 755 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 756 } else if (isbinary) { 757 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 758 } 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatGetInfo_MPIDense" 764 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 765 { 766 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 767 Mat mdn = mat->A; 768 PetscErrorCode ierr; 769 PetscReal isend[5],irecv[5]; 770 771 PetscFunctionBegin; 772 info->block_size = 1.0; 773 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 774 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 775 isend[3] = info->memory; isend[4] = info->mallocs; 776 if (flag == MAT_LOCAL) { 777 info->nz_used = isend[0]; 778 info->nz_allocated = isend[1]; 779 info->nz_unneeded = isend[2]; 780 info->memory = isend[3]; 781 info->mallocs = isend[4]; 782 } else if (flag == MAT_GLOBAL_MAX) { 783 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 784 info->nz_used = irecv[0]; 785 info->nz_allocated = irecv[1]; 786 info->nz_unneeded = irecv[2]; 787 info->memory = irecv[3]; 788 info->mallocs = irecv[4]; 789 } else if (flag == MAT_GLOBAL_SUM) { 790 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 791 info->nz_used = irecv[0]; 792 info->nz_allocated = irecv[1]; 793 info->nz_unneeded = irecv[2]; 794 info->memory = irecv[3]; 795 info->mallocs = irecv[4]; 796 } 797 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 798 info->fill_ratio_needed = 0; 799 info->factor_mallocs = 0; 800 PetscFunctionReturn(0); 801 } 802 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatSetOption_MPIDense" 805 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 806 { 807 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 808 PetscErrorCode ierr; 809 810 PetscFunctionBegin; 811 switch (op) { 812 case MAT_NEW_NONZERO_LOCATIONS: 813 case MAT_NEW_NONZERO_LOCATION_ERR: 814 case MAT_NEW_NONZERO_ALLOCATION_ERR: 815 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 816 break; 817 case MAT_ROW_ORIENTED: 818 a->roworiented = flg; 819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 820 break; 821 case MAT_NEW_DIAGONALS: 822 case MAT_KEEP_NONZERO_PATTERN: 823 case MAT_USE_HASH_TABLE: 824 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 825 break; 826 case MAT_IGNORE_OFF_PROC_ENTRIES: 827 a->donotstash = flg; 828 break; 829 case MAT_SYMMETRIC: 830 case MAT_STRUCTURALLY_SYMMETRIC: 831 case MAT_HERMITIAN: 832 case MAT_SYMMETRY_ETERNAL: 833 case MAT_IGNORE_LOWER_TRIANGULAR: 834 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 835 break; 836 default: 837 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 838 } 839 PetscFunctionReturn(0); 840 } 841 842 843 #undef __FUNCT__ 844 #define __FUNCT__ "MatDiagonalScale_MPIDense" 845 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 846 { 847 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 848 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 849 PetscScalar *l,*r,x,*v; 850 PetscErrorCode ierr; 851 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 852 853 PetscFunctionBegin; 854 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 855 if (ll) { 856 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 857 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 858 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 859 for (i=0; i<m; i++) { 860 x = l[i]; 861 v = mat->v + i; 862 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 863 } 864 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 865 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 866 } 867 if (rr) { 868 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 869 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 870 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 873 for (i=0; i<n; i++) { 874 x = r[i]; 875 v = mat->v + i*m; 876 for (j=0; j<m; j++) { (*v++) *= x;} 877 } 878 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 879 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 880 } 881 PetscFunctionReturn(0); 882 } 883 884 #undef __FUNCT__ 885 #define __FUNCT__ "MatNorm_MPIDense" 886 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 887 { 888 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 889 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 890 PetscErrorCode ierr; 891 PetscInt i,j; 892 PetscReal sum = 0.0; 893 PetscScalar *v = mat->v; 894 895 PetscFunctionBegin; 896 if (mdn->size == 1) { 897 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 898 } else { 899 if (type == NORM_FROBENIUS) { 900 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 901 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 902 } 903 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 904 *nrm = PetscSqrtReal(*nrm); 905 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 906 } else if (type == NORM_1) { 907 PetscReal *tmp,*tmp2; 908 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 909 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 910 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 911 *nrm = 0.0; 912 v = mat->v; 913 for (j=0; j<mdn->A->cmap->n; j++) { 914 for (i=0; i<mdn->A->rmap->n; i++) { 915 tmp[j] += PetscAbsScalar(*v); v++; 916 } 917 } 918 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 919 for (j=0; j<A->cmap->N; j++) { 920 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 921 } 922 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 923 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 924 } else if (type == NORM_INFINITY) { /* max row norm */ 925 PetscReal ntemp; 926 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 927 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 928 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatTranspose_MPIDense" 935 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 936 { 937 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 938 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 939 Mat B; 940 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 941 PetscErrorCode ierr; 942 PetscInt j,i; 943 PetscScalar *v; 944 945 PetscFunctionBegin; 946 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 947 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 948 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 949 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 950 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 951 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 952 } else { 953 B = *matout; 954 } 955 956 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 957 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 958 for (i=0; i<m; i++) rwork[i] = rstart + i; 959 for (j=0; j<n; j++) { 960 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 961 v += m; 962 } 963 ierr = PetscFree(rwork);CHKERRQ(ierr); 964 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 965 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 966 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 967 *matout = B; 968 } else { 969 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 970 } 971 PetscFunctionReturn(0); 972 } 973 974 975 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 976 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 977 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatSetUp_MPIDense" 980 PetscErrorCode MatSetUp_MPIDense(Mat A) 981 { 982 PetscErrorCode ierr; 983 984 PetscFunctionBegin; 985 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatAXPY_MPIDense" 991 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 992 { 993 PetscErrorCode ierr; 994 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 995 996 PetscFunctionBegin; 997 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatConjugate_MPIDense" 1003 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1004 { 1005 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatRealPart_MPIDense" 1015 PetscErrorCode MatRealPart_MPIDense(Mat A) 1016 { 1017 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1018 PetscErrorCode ierr; 1019 1020 PetscFunctionBegin; 1021 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1027 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1028 { 1029 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1030 PetscErrorCode ierr; 1031 1032 PetscFunctionBegin; 1033 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1040 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1041 { 1042 PetscErrorCode ierr; 1043 PetscInt i,n; 1044 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1045 PetscReal *work; 1046 1047 PetscFunctionBegin; 1048 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 1049 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 1050 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1051 if (type == NORM_2) { 1052 for (i=0; i<n; i++) work[i] *= work[i]; 1053 } 1054 if (type == NORM_INFINITY) { 1055 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1056 } else { 1057 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1058 } 1059 ierr = PetscFree(work);CHKERRQ(ierr); 1060 if (type == NORM_2) { 1061 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1062 } 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatSetRandom_MPIDense" 1068 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1069 { 1070 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1071 PetscErrorCode ierr; 1072 PetscScalar *a; 1073 PetscInt m,n,i; 1074 1075 PetscFunctionBegin; 1076 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1077 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1078 for (i=0; i<m*n; i++) { 1079 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1080 } 1081 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 /* -------------------------------------------------------------------*/ 1086 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1087 MatGetRow_MPIDense, 1088 MatRestoreRow_MPIDense, 1089 MatMult_MPIDense, 1090 /* 4*/ MatMultAdd_MPIDense, 1091 MatMultTranspose_MPIDense, 1092 MatMultTransposeAdd_MPIDense, 1093 0, 1094 0, 1095 0, 1096 /* 10*/ 0, 1097 0, 1098 0, 1099 0, 1100 MatTranspose_MPIDense, 1101 /* 15*/ MatGetInfo_MPIDense, 1102 MatEqual_MPIDense, 1103 MatGetDiagonal_MPIDense, 1104 MatDiagonalScale_MPIDense, 1105 MatNorm_MPIDense, 1106 /* 20*/ MatAssemblyBegin_MPIDense, 1107 MatAssemblyEnd_MPIDense, 1108 MatSetOption_MPIDense, 1109 MatZeroEntries_MPIDense, 1110 /* 24*/ MatZeroRows_MPIDense, 1111 0, 1112 0, 1113 0, 1114 0, 1115 /* 29*/ MatSetUp_MPIDense, 1116 0, 1117 0, 1118 0, 1119 0, 1120 /* 34*/ MatDuplicate_MPIDense, 1121 0, 1122 0, 1123 0, 1124 0, 1125 /* 39*/ MatAXPY_MPIDense, 1126 MatGetSubMatrices_MPIDense, 1127 0, 1128 MatGetValues_MPIDense, 1129 0, 1130 /* 44*/ 0, 1131 MatScale_MPIDense, 1132 0, 1133 0, 1134 0, 1135 /* 49*/ MatSetRandom_MPIDense, 1136 0, 1137 0, 1138 0, 1139 0, 1140 /* 54*/ 0, 1141 0, 1142 0, 1143 0, 1144 0, 1145 /* 59*/ MatGetSubMatrix_MPIDense, 1146 MatDestroy_MPIDense, 1147 MatView_MPIDense, 1148 0, 1149 0, 1150 /* 64*/ 0, 1151 0, 1152 0, 1153 0, 1154 0, 1155 /* 69*/ 0, 1156 0, 1157 0, 1158 0, 1159 0, 1160 /* 74*/ 0, 1161 0, 1162 0, 1163 0, 1164 0, 1165 /* 79*/ 0, 1166 0, 1167 0, 1168 0, 1169 /* 83*/ MatLoad_MPIDense, 1170 0, 1171 0, 1172 0, 1173 0, 1174 0, 1175 /* 89*/ 1176 0, 1177 0, 1178 0, 1179 0, 1180 0, 1181 /* 94*/ 0, 1182 0, 1183 0, 1184 0, 1185 0, 1186 /* 99*/ 0, 1187 0, 1188 0, 1189 MatConjugate_MPIDense, 1190 0, 1191 /*104*/ 0, 1192 MatRealPart_MPIDense, 1193 MatImaginaryPart_MPIDense, 1194 0, 1195 0, 1196 /*109*/ 0, 1197 0, 1198 0, 1199 0, 1200 0, 1201 /*114*/ 0, 1202 0, 1203 0, 1204 0, 1205 0, 1206 /*119*/ 0, 1207 0, 1208 0, 1209 0, 1210 0, 1211 /*124*/ 0, 1212 MatGetColumnNorms_MPIDense 1213 }; 1214 1215 EXTERN_C_BEGIN 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1218 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1219 { 1220 Mat_MPIDense *a; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 mat->preallocated = PETSC_TRUE; 1225 /* Note: For now, when data is specified above, this assumes the user correctly 1226 allocates the local dense storage space. We should add error checking. */ 1227 1228 a = (Mat_MPIDense*)mat->data; 1229 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1230 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1231 a->nvec = mat->cmap->n; 1232 1233 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1234 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1235 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1236 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1237 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 EXTERN_C_END 1241 1242 EXTERN_C_BEGIN 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatCreate_MPIDense" 1245 PetscErrorCode MatCreate_MPIDense(Mat mat) 1246 { 1247 Mat_MPIDense *a; 1248 PetscErrorCode ierr; 1249 1250 PetscFunctionBegin; 1251 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1252 mat->data = (void*)a; 1253 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1254 1255 mat->insertmode = NOT_SET_VALUES; 1256 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1257 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1258 1259 /* build cache for off array entries formed */ 1260 a->donotstash = PETSC_FALSE; 1261 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1262 1263 /* stuff used for matrix vector multiply */ 1264 a->lvec = 0; 1265 a->Mvctx = 0; 1266 a->roworiented = PETSC_TRUE; 1267 1268 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1270 1271 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1272 "MatGetDiagonalBlock_MPIDense", 1273 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1275 "MatMPIDenseSetPreallocation_MPIDense", 1276 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1278 "MatMatMult_MPIAIJ_MPIDense", 1279 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1281 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1282 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1284 "MatMatMultNumeric_MPIAIJ_MPIDense", 1285 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1286 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1287 1288 PetscFunctionReturn(0); 1289 } 1290 EXTERN_C_END 1291 1292 /*MC 1293 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1294 1295 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1296 and MATMPIDENSE otherwise. 1297 1298 Options Database Keys: 1299 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1300 1301 Level: beginner 1302 1303 1304 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1305 M*/ 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1309 /*@C 1310 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1311 1312 Not collective 1313 1314 Input Parameters: 1315 . A - the matrix 1316 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1317 to control all matrix memory allocation. 1318 1319 Notes: 1320 The dense format is fully compatible with standard Fortran 77 1321 storage by columns. 1322 1323 The data input variable is intended primarily for Fortran programmers 1324 who wish to allocate their own matrix memory space. Most users should 1325 set data=PETSC_NULL. 1326 1327 Level: intermediate 1328 1329 .keywords: matrix,dense, parallel 1330 1331 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1332 @*/ 1333 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1334 { 1335 PetscErrorCode ierr; 1336 1337 PetscFunctionBegin; 1338 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatCreateDense" 1344 /*@C 1345 MatCreateDense - Creates a parallel matrix in dense format. 1346 1347 Collective on MPI_Comm 1348 1349 Input Parameters: 1350 + comm - MPI communicator 1351 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1352 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1353 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1354 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1355 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1356 to control all matrix memory allocation. 1357 1358 Output Parameter: 1359 . A - the matrix 1360 1361 Notes: 1362 The dense format is fully compatible with standard Fortran 77 1363 storage by columns. 1364 1365 The data input variable is intended primarily for Fortran programmers 1366 who wish to allocate their own matrix memory space. Most users should 1367 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1368 1369 The user MUST specify either the local or global matrix dimensions 1370 (possibly both). 1371 1372 Level: intermediate 1373 1374 .keywords: matrix,dense, parallel 1375 1376 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1377 @*/ 1378 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1379 { 1380 PetscErrorCode ierr; 1381 PetscMPIInt size; 1382 1383 PetscFunctionBegin; 1384 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1385 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1386 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1387 if (size > 1) { 1388 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1389 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1390 } else { 1391 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1392 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1393 } 1394 PetscFunctionReturn(0); 1395 } 1396 1397 #undef __FUNCT__ 1398 #define __FUNCT__ "MatDuplicate_MPIDense" 1399 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1400 { 1401 Mat mat; 1402 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1403 PetscErrorCode ierr; 1404 1405 PetscFunctionBegin; 1406 *newmat = 0; 1407 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1408 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1409 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1410 a = (Mat_MPIDense*)mat->data; 1411 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1412 1413 mat->factortype = A->factortype; 1414 mat->assembled = PETSC_TRUE; 1415 mat->preallocated = PETSC_TRUE; 1416 1417 a->size = oldmat->size; 1418 a->rank = oldmat->rank; 1419 mat->insertmode = NOT_SET_VALUES; 1420 a->nvec = oldmat->nvec; 1421 a->donotstash = oldmat->donotstash; 1422 1423 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1424 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1425 1426 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1427 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1428 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1429 1430 *newmat = mat; 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1436 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1437 { 1438 PetscErrorCode ierr; 1439 PetscMPIInt rank,size; 1440 PetscInt *rowners,i,m,nz,j; 1441 PetscScalar *array,*vals,*vals_ptr; 1442 1443 PetscFunctionBegin; 1444 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1445 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1446 1447 /* determine ownership of all rows */ 1448 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1449 else m = newmat->rmap->n; 1450 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1451 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1452 rowners[0] = 0; 1453 for (i=2; i<=size; i++) { 1454 rowners[i] += rowners[i-1]; 1455 } 1456 1457 if (!sizesset) { 1458 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1459 } 1460 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1461 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1462 1463 if (!rank) { 1464 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1465 1466 /* read in my part of the matrix numerical values */ 1467 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1468 1469 /* insert into matrix-by row (this is why cannot directly read into array */ 1470 vals_ptr = vals; 1471 for (i=0; i<m; i++) { 1472 for (j=0; j<N; j++) { 1473 array[i + j*m] = *vals_ptr++; 1474 } 1475 } 1476 1477 /* read in other processors and ship out */ 1478 for (i=1; i<size; i++) { 1479 nz = (rowners[i+1] - rowners[i])*N; 1480 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1481 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1482 } 1483 } else { 1484 /* receive numeric values */ 1485 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1486 1487 /* receive message of values*/ 1488 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1489 1490 /* insert into matrix-by row (this is why cannot directly read into array */ 1491 vals_ptr = vals; 1492 for (i=0; i<m; i++) { 1493 for (j=0; j<N; j++) { 1494 array[i + j*m] = *vals_ptr++; 1495 } 1496 } 1497 } 1498 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1499 ierr = PetscFree(rowners);CHKERRQ(ierr); 1500 ierr = PetscFree(vals);CHKERRQ(ierr); 1501 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1502 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatLoad_MPIDense" 1508 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1509 { 1510 PetscScalar *vals,*svals; 1511 MPI_Comm comm = ((PetscObject)viewer)->comm; 1512 MPI_Status status; 1513 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1514 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1515 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1516 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1517 int fd; 1518 PetscErrorCode ierr; 1519 1520 PetscFunctionBegin; 1521 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1522 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1523 if (!rank) { 1524 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1525 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1526 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1527 } 1528 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1529 1530 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1531 M = header[1]; N = header[2]; nz = header[3]; 1532 1533 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1534 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1535 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1536 1537 /* If global sizes are set, check if they are consistent with that given in the file */ 1538 if (sizesset) { 1539 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1540 } 1541 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1542 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1543 1544 /* 1545 Handle case where matrix is stored on disk as a dense matrix 1546 */ 1547 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1548 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1549 PetscFunctionReturn(0); 1550 } 1551 1552 /* determine ownership of all rows */ 1553 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1554 else m = PetscMPIIntCast(newmat->rmap->n); 1555 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1556 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1557 rowners[0] = 0; 1558 for (i=2; i<=size; i++) { 1559 rowners[i] += rowners[i-1]; 1560 } 1561 rstart = rowners[rank]; 1562 rend = rowners[rank+1]; 1563 1564 /* distribute row lengths to all processors */ 1565 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1566 if (!rank) { 1567 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1568 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1569 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1570 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1571 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1572 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1573 } else { 1574 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1575 } 1576 1577 if (!rank) { 1578 /* calculate the number of nonzeros on each processor */ 1579 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1580 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1581 for (i=0; i<size; i++) { 1582 for (j=rowners[i]; j< rowners[i+1]; j++) { 1583 procsnz[i] += rowlengths[j]; 1584 } 1585 } 1586 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1587 1588 /* determine max buffer needed and allocate it */ 1589 maxnz = 0; 1590 for (i=0; i<size; i++) { 1591 maxnz = PetscMax(maxnz,procsnz[i]); 1592 } 1593 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1594 1595 /* read in my part of the matrix column indices */ 1596 nz = procsnz[0]; 1597 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1598 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1599 1600 /* read in every one elses and ship off */ 1601 for (i=1; i<size; i++) { 1602 nz = procsnz[i]; 1603 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1604 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1605 } 1606 ierr = PetscFree(cols);CHKERRQ(ierr); 1607 } else { 1608 /* determine buffer space needed for message */ 1609 nz = 0; 1610 for (i=0; i<m; i++) { 1611 nz += ourlens[i]; 1612 } 1613 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1614 1615 /* receive message of column indices*/ 1616 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1617 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1618 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1619 } 1620 1621 /* loop over local rows, determining number of off diagonal entries */ 1622 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1623 jj = 0; 1624 for (i=0; i<m; i++) { 1625 for (j=0; j<ourlens[i]; j++) { 1626 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1627 jj++; 1628 } 1629 } 1630 1631 /* create our matrix */ 1632 for (i=0; i<m; i++) { 1633 ourlens[i] -= offlens[i]; 1634 } 1635 1636 if (!sizesset) { 1637 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1638 } 1639 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1640 for (i=0; i<m; i++) { 1641 ourlens[i] += offlens[i]; 1642 } 1643 1644 if (!rank) { 1645 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1646 1647 /* read in my part of the matrix numerical values */ 1648 nz = procsnz[0]; 1649 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1650 1651 /* insert into matrix */ 1652 jj = rstart; 1653 smycols = mycols; 1654 svals = vals; 1655 for (i=0; i<m; i++) { 1656 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1657 smycols += ourlens[i]; 1658 svals += ourlens[i]; 1659 jj++; 1660 } 1661 1662 /* read in other processors and ship out */ 1663 for (i=1; i<size; i++) { 1664 nz = procsnz[i]; 1665 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1666 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1667 } 1668 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1669 } else { 1670 /* receive numeric values */ 1671 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1672 1673 /* receive message of values*/ 1674 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1675 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1676 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1677 1678 /* insert into matrix */ 1679 jj = rstart; 1680 smycols = mycols; 1681 svals = vals; 1682 for (i=0; i<m; i++) { 1683 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1684 smycols += ourlens[i]; 1685 svals += ourlens[i]; 1686 jj++; 1687 } 1688 } 1689 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 1690 ierr = PetscFree(vals);CHKERRQ(ierr); 1691 ierr = PetscFree(mycols);CHKERRQ(ierr); 1692 ierr = PetscFree(rowners);CHKERRQ(ierr); 1693 1694 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1695 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatEqual_MPIDense" 1701 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1702 { 1703 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1704 Mat a,b; 1705 PetscBool flg; 1706 PetscErrorCode ierr; 1707 1708 PetscFunctionBegin; 1709 a = matA->A; 1710 b = matB->A; 1711 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1712 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1713 PetscFunctionReturn(0); 1714 } 1715 1716