1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 32 if (flg) *B = mat->A; 33 else *B = A; 34 PetscFunctionReturn(0); 35 } 36 37 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 38 { 39 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 40 PetscErrorCode ierr; 41 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 42 43 PetscFunctionBegin; 44 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 45 lrow = row - rstart; 46 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 51 { 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 56 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 61 { 62 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 63 PetscErrorCode ierr; 64 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 65 PetscScalar *array; 66 MPI_Comm comm; 67 Mat B; 68 69 PetscFunctionBegin; 70 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 71 72 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 73 if (!B) { 74 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 75 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 76 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 77 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 78 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 79 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 80 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 81 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 82 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 84 *a = B; 85 ierr = MatDestroy(&B);CHKERRQ(ierr); 86 } else *a = B; 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 91 { 92 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 93 PetscErrorCode ierr; 94 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 95 PetscBool roworiented = A->roworiented; 96 97 PetscFunctionBegin; 98 for (i=0; i<m; i++) { 99 if (idxm[i] < 0) continue; 100 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 101 if (idxm[i] >= rstart && idxm[i] < rend) { 102 row = idxm[i] - rstart; 103 if (roworiented) { 104 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 105 } else { 106 for (j=0; j<n; j++) { 107 if (idxn[j] < 0) continue; 108 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 109 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 110 } 111 } 112 } else if (!A->donotstash) { 113 mat->assembled = PETSC_FALSE; 114 if (roworiented) { 115 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 116 } else { 117 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 118 } 119 } 120 } 121 PetscFunctionReturn(0); 122 } 123 124 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 125 { 126 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 127 PetscErrorCode ierr; 128 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 129 130 PetscFunctionBegin; 131 for (i=0; i<m; i++) { 132 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 133 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 134 if (idxm[i] >= rstart && idxm[i] < rend) { 135 row = idxm[i] - rstart; 136 for (j=0; j<n; j++) { 137 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 138 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 139 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 140 } 141 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 142 } 143 PetscFunctionReturn(0); 144 } 145 146 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 147 { 148 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 149 PetscErrorCode ierr; 150 151 PetscFunctionBegin; 152 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 153 PetscFunctionReturn(0); 154 } 155 156 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 157 { 158 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 159 PetscErrorCode ierr; 160 161 PetscFunctionBegin; 162 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 167 { 168 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 169 PetscErrorCode ierr; 170 171 PetscFunctionBegin; 172 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 173 PetscFunctionReturn(0); 174 } 175 176 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 177 { 178 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 179 PetscErrorCode ierr; 180 181 PetscFunctionBegin; 182 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 187 { 188 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 189 PetscErrorCode ierr; 190 191 PetscFunctionBegin; 192 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 193 PetscFunctionReturn(0); 194 } 195 196 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 197 { 198 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 199 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 200 PetscErrorCode ierr; 201 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 202 const PetscInt *irow,*icol; 203 PetscScalar *av,*bv,*v = lmat->v; 204 Mat newmat; 205 IS iscol_local; 206 MPI_Comm comm_is,comm_mat; 207 208 PetscFunctionBegin; 209 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 210 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 211 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 212 213 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 214 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 215 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 216 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 217 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 218 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 219 220 /* No parallel redistribution currently supported! Should really check each index set 221 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 222 original matrix! */ 223 224 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 225 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 226 227 /* Check submatrix call */ 228 if (scall == MAT_REUSE_MATRIX) { 229 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 230 /* Really need to test rows and column sizes! */ 231 newmat = *B; 232 } else { 233 /* Create and fill new matrix */ 234 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 235 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 236 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 237 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 238 } 239 240 /* Now extract the data pointers and do the copy, column at a time */ 241 newmatd = (Mat_MPIDense*)newmat->data; 242 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 243 244 for (i=0; i<Ncols; i++) { 245 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 246 for (j=0; j<nrows; j++) { 247 *bv++ = av[irow[j] - rstart]; 248 } 249 } 250 251 /* Assemble the matrices so that the correct flags are set */ 252 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 253 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 254 255 /* Free work space */ 256 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 257 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 258 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 259 *B = newmat; 260 PetscFunctionReturn(0); 261 } 262 263 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 264 { 265 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 266 PetscErrorCode ierr; 267 268 PetscFunctionBegin; 269 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 276 PetscErrorCode ierr; 277 278 PetscFunctionBegin; 279 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 280 PetscFunctionReturn(0); 281 } 282 283 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 284 { 285 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 286 MPI_Comm comm; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 293 /* make sure all processors are either in INSERTMODE or ADDMODE */ 294 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 295 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 296 mat->insertmode = addv; /* in case this processor had no cache */ 297 298 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 299 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 300 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 301 PetscFunctionReturn(0); 302 } 303 304 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 305 { 306 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 307 PetscErrorCode ierr; 308 PetscInt i,*row,*col,flg,j,rstart,ncols; 309 PetscMPIInt n; 310 PetscScalar *val; 311 312 PetscFunctionBegin; 313 /* wait on receives */ 314 while (1) { 315 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 316 if (!flg) break; 317 318 for (i=0; i<n;) { 319 /* Now identify the consecutive vals belonging to the same row */ 320 for (j=i,rstart=row[j]; j<n; j++) { 321 if (row[j] != rstart) break; 322 } 323 if (j < n) ncols = j-i; 324 else ncols = n-i; 325 /* Now assemble all these values with a single function call */ 326 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 327 i = j; 328 } 329 } 330 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 331 332 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 333 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 334 335 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 336 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 337 } 338 PetscFunctionReturn(0); 339 } 340 341 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 342 { 343 PetscErrorCode ierr; 344 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 345 346 PetscFunctionBegin; 347 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* the code does not do the diagonal entries correctly unless the 352 matrix is square and the column and row owerships are identical. 353 This is a BUG. The only way to fix it seems to be to access 354 mdn->A and mdn->B directly and not through the MatZeroRows() 355 routine. 356 */ 357 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 358 { 359 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 360 PetscErrorCode ierr; 361 PetscInt i,*owners = A->rmap->range; 362 PetscInt *sizes,j,idx,nsends; 363 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 364 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 365 PetscInt *lens,*lrows,*values; 366 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 367 MPI_Comm comm; 368 MPI_Request *send_waits,*recv_waits; 369 MPI_Status recv_status,*send_status; 370 PetscBool found; 371 const PetscScalar *xx; 372 PetscScalar *bb; 373 374 PetscFunctionBegin; 375 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 376 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 377 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 378 /* first count number of contributors to each processor */ 379 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 380 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 381 for (i=0; i<N; i++) { 382 idx = rows[i]; 383 found = PETSC_FALSE; 384 for (j=0; j<size; j++) { 385 if (idx >= owners[j] && idx < owners[j+1]) { 386 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 387 } 388 } 389 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 390 } 391 nsends = 0; 392 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 393 394 /* inform other processors of number of messages and max length*/ 395 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 396 397 /* post receives: */ 398 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 399 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 400 for (i=0; i<nrecvs; i++) { 401 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 402 } 403 404 /* do sends: 405 1) starts[i] gives the starting index in svalues for stuff going to 406 the ith processor 407 */ 408 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 409 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 410 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 411 412 starts[0] = 0; 413 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 414 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 415 416 starts[0] = 0; 417 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 418 count = 0; 419 for (i=0; i<size; i++) { 420 if (sizes[2*i+1]) { 421 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 422 } 423 } 424 ierr = PetscFree(starts);CHKERRQ(ierr); 425 426 base = owners[rank]; 427 428 /* wait on receives */ 429 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 430 count = nrecvs; 431 slen = 0; 432 while (count) { 433 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 434 /* unpack receives into our local space */ 435 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 436 437 source[imdex] = recv_status.MPI_SOURCE; 438 lens[imdex] = n; 439 slen += n; 440 count--; 441 } 442 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 443 444 /* move the data into the send scatter */ 445 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 446 count = 0; 447 for (i=0; i<nrecvs; i++) { 448 values = rvalues + i*nmax; 449 for (j=0; j<lens[i]; j++) { 450 lrows[count++] = values[j] - base; 451 } 452 } 453 ierr = PetscFree(rvalues);CHKERRQ(ierr); 454 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 455 ierr = PetscFree(owner);CHKERRQ(ierr); 456 ierr = PetscFree(sizes);CHKERRQ(ierr); 457 458 /* fix right hand side if needed */ 459 if (x && b) { 460 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 461 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 462 for (i=0; i<slen; i++) { 463 bb[lrows[i]] = diag*xx[lrows[i]]; 464 } 465 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 466 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 467 } 468 469 /* actually zap the local rows */ 470 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 471 if (diag != 0.0) { 472 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 473 PetscInt m = ll->lda, i; 474 475 for (i=0; i<slen; i++) { 476 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 477 } 478 } 479 ierr = PetscFree(lrows);CHKERRQ(ierr); 480 481 /* wait on sends */ 482 if (nsends) { 483 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 484 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 485 ierr = PetscFree(send_status);CHKERRQ(ierr); 486 } 487 ierr = PetscFree(send_waits);CHKERRQ(ierr); 488 ierr = PetscFree(svalues);CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 493 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 494 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 495 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 496 497 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 498 { 499 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 500 PetscErrorCode ierr; 501 502 PetscFunctionBegin; 503 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 504 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 505 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 506 PetscFunctionReturn(0); 507 } 508 509 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 510 { 511 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 512 PetscErrorCode ierr; 513 514 PetscFunctionBegin; 515 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 516 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 517 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 522 { 523 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 524 PetscErrorCode ierr; 525 PetscScalar zero = 0.0; 526 527 PetscFunctionBegin; 528 ierr = VecSet(yy,zero);CHKERRQ(ierr); 529 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 530 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 531 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } 534 535 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 536 { 537 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 538 PetscErrorCode ierr; 539 540 PetscFunctionBegin; 541 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 542 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 543 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 544 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 545 PetscFunctionReturn(0); 546 } 547 548 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 549 { 550 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 551 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 552 PetscErrorCode ierr; 553 PetscInt len,i,n,m = A->rmap->n,radd; 554 PetscScalar *x,zero = 0.0; 555 556 PetscFunctionBegin; 557 ierr = VecSet(v,zero);CHKERRQ(ierr); 558 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 559 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 560 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 561 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 562 radd = A->rmap->rstart*m; 563 for (i=0; i<len; i++) { 564 x[i] = aloc->v[radd + i*m + i]; 565 } 566 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 567 PetscFunctionReturn(0); 568 } 569 570 PetscErrorCode MatDestroy_MPIDense(Mat mat) 571 { 572 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 573 PetscErrorCode ierr; 574 575 PetscFunctionBegin; 576 #if defined(PETSC_USE_LOG) 577 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 578 #endif 579 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 580 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 581 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 582 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 583 584 ierr = PetscFree(mat->data);CHKERRQ(ierr); 585 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 586 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 594 #if defined(PETSC_HAVE_ELEMENTAL) 595 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 596 #endif 597 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 607 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 608 PetscFunctionReturn(0); 609 } 610 611 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 612 613 #include <petscdraw.h> 614 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 615 { 616 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 617 PetscErrorCode ierr; 618 PetscMPIInt rank = mdn->rank; 619 PetscViewerType vtype; 620 PetscBool iascii,isdraw; 621 PetscViewer sviewer; 622 PetscViewerFormat format; 623 624 PetscFunctionBegin; 625 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 626 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 627 if (iascii) { 628 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 629 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 630 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 631 MatInfo info; 632 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 633 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 634 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 635 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 636 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 637 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 638 PetscFunctionReturn(0); 639 } else if (format == PETSC_VIEWER_ASCII_INFO) { 640 PetscFunctionReturn(0); 641 } 642 } else if (isdraw) { 643 PetscDraw draw; 644 PetscBool isnull; 645 646 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 647 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 648 if (isnull) PetscFunctionReturn(0); 649 } 650 651 { 652 /* assemble the entire matrix onto first processor. */ 653 Mat A; 654 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 655 PetscInt *cols; 656 PetscScalar *vals; 657 658 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 659 if (!rank) { 660 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 661 } else { 662 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 663 } 664 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 665 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 666 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 667 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 668 669 /* Copy the matrix ... This isn't the most efficient means, 670 but it's quick for now */ 671 A->insertmode = INSERT_VALUES; 672 673 row = mat->rmap->rstart; 674 m = mdn->A->rmap->n; 675 for (i=0; i<m; i++) { 676 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 677 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 678 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 679 row++; 680 } 681 682 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 683 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 684 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 685 if (!rank) { 686 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 687 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 688 } 689 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 690 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 691 ierr = MatDestroy(&A);CHKERRQ(ierr); 692 } 693 PetscFunctionReturn(0); 694 } 695 696 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 697 { 698 PetscErrorCode ierr; 699 PetscBool iascii,isbinary,isdraw,issocket; 700 701 PetscFunctionBegin; 702 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 703 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 704 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 705 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 706 707 if (iascii || issocket || isdraw) { 708 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 709 } else if (isbinary) { 710 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 711 } 712 PetscFunctionReturn(0); 713 } 714 715 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 716 { 717 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 718 Mat mdn = mat->A; 719 PetscErrorCode ierr; 720 PetscLogDouble isend[5],irecv[5]; 721 722 PetscFunctionBegin; 723 info->block_size = 1.0; 724 725 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 726 727 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 728 isend[3] = info->memory; isend[4] = info->mallocs; 729 if (flag == MAT_LOCAL) { 730 info->nz_used = isend[0]; 731 info->nz_allocated = isend[1]; 732 info->nz_unneeded = isend[2]; 733 info->memory = isend[3]; 734 info->mallocs = isend[4]; 735 } else if (flag == MAT_GLOBAL_MAX) { 736 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 737 738 info->nz_used = irecv[0]; 739 info->nz_allocated = irecv[1]; 740 info->nz_unneeded = irecv[2]; 741 info->memory = irecv[3]; 742 info->mallocs = irecv[4]; 743 } else if (flag == MAT_GLOBAL_SUM) { 744 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 745 746 info->nz_used = irecv[0]; 747 info->nz_allocated = irecv[1]; 748 info->nz_unneeded = irecv[2]; 749 info->memory = irecv[3]; 750 info->mallocs = irecv[4]; 751 } 752 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 753 info->fill_ratio_needed = 0; 754 info->factor_mallocs = 0; 755 PetscFunctionReturn(0); 756 } 757 758 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 759 { 760 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 761 PetscErrorCode ierr; 762 763 PetscFunctionBegin; 764 switch (op) { 765 case MAT_NEW_NONZERO_LOCATIONS: 766 case MAT_NEW_NONZERO_LOCATION_ERR: 767 case MAT_NEW_NONZERO_ALLOCATION_ERR: 768 MatCheckPreallocated(A,1); 769 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 770 break; 771 case MAT_ROW_ORIENTED: 772 MatCheckPreallocated(A,1); 773 a->roworiented = flg; 774 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 775 break; 776 case MAT_NEW_DIAGONALS: 777 case MAT_KEEP_NONZERO_PATTERN: 778 case MAT_USE_HASH_TABLE: 779 case MAT_SORTED_FULL: 780 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 781 break; 782 case MAT_IGNORE_OFF_PROC_ENTRIES: 783 a->donotstash = flg; 784 break; 785 case MAT_SYMMETRIC: 786 case MAT_STRUCTURALLY_SYMMETRIC: 787 case MAT_HERMITIAN: 788 case MAT_SYMMETRY_ETERNAL: 789 case MAT_IGNORE_LOWER_TRIANGULAR: 790 case MAT_IGNORE_ZERO_ENTRIES: 791 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 792 break; 793 default: 794 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 800 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 801 { 802 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 803 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 804 const PetscScalar *l,*r; 805 PetscScalar x,*v; 806 PetscErrorCode ierr; 807 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 808 809 PetscFunctionBegin; 810 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 811 if (ll) { 812 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 813 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 814 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 815 for (i=0; i<m; i++) { 816 x = l[i]; 817 v = mat->v + i; 818 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 819 } 820 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 821 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 822 } 823 if (rr) { 824 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 825 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 826 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 827 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 829 for (i=0; i<n; i++) { 830 x = r[i]; 831 v = mat->v + i*m; 832 for (j=0; j<m; j++) (*v++) *= x; 833 } 834 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 835 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 836 } 837 PetscFunctionReturn(0); 838 } 839 840 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 841 { 842 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 843 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 844 PetscErrorCode ierr; 845 PetscInt i,j; 846 PetscReal sum = 0.0; 847 PetscScalar *v = mat->v; 848 849 PetscFunctionBegin; 850 if (mdn->size == 1) { 851 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 852 } else { 853 if (type == NORM_FROBENIUS) { 854 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 855 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 856 } 857 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 858 *nrm = PetscSqrtReal(*nrm); 859 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 860 } else if (type == NORM_1) { 861 PetscReal *tmp,*tmp2; 862 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 863 *nrm = 0.0; 864 v = mat->v; 865 for (j=0; j<mdn->A->cmap->n; j++) { 866 for (i=0; i<mdn->A->rmap->n; i++) { 867 tmp[j] += PetscAbsScalar(*v); v++; 868 } 869 } 870 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 871 for (j=0; j<A->cmap->N; j++) { 872 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 873 } 874 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 875 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 876 } else if (type == NORM_INFINITY) { /* max row norm */ 877 PetscReal ntemp; 878 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 879 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 880 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 881 } 882 PetscFunctionReturn(0); 883 } 884 885 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 886 { 887 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 888 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 889 Mat B; 890 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 891 PetscErrorCode ierr; 892 PetscInt j,i; 893 PetscScalar *v; 894 895 PetscFunctionBegin; 896 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 897 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 898 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 899 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 900 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 901 } else { 902 B = *matout; 903 } 904 905 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 906 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 907 for (i=0; i<m; i++) rwork[i] = rstart + i; 908 for (j=0; j<n; j++) { 909 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 910 v += m; 911 } 912 ierr = PetscFree(rwork);CHKERRQ(ierr); 913 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 915 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 916 *matout = B; 917 } else { 918 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 919 } 920 PetscFunctionReturn(0); 921 } 922 923 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 924 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 925 926 PetscErrorCode MatSetUp_MPIDense(Mat A) 927 { 928 PetscErrorCode ierr; 929 930 PetscFunctionBegin; 931 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 932 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 933 if (!A->preallocated) { 934 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 935 } 936 PetscFunctionReturn(0); 937 } 938 939 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 940 { 941 PetscErrorCode ierr; 942 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 943 944 PetscFunctionBegin; 945 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 946 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 947 PetscFunctionReturn(0); 948 } 949 950 PetscErrorCode MatConjugate_MPIDense(Mat mat) 951 { 952 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 953 PetscErrorCode ierr; 954 955 PetscFunctionBegin; 956 ierr = MatConjugate(a->A);CHKERRQ(ierr); 957 PetscFunctionReturn(0); 958 } 959 960 PetscErrorCode MatRealPart_MPIDense(Mat A) 961 { 962 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 ierr = MatRealPart(a->A);CHKERRQ(ierr); 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 971 { 972 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 973 PetscErrorCode ierr; 974 975 PetscFunctionBegin; 976 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 977 PetscFunctionReturn(0); 978 } 979 980 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 981 { 982 PetscErrorCode ierr; 983 PetscScalar *x; 984 const PetscScalar *a; 985 PetscInt lda; 986 987 PetscFunctionBegin; 988 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 989 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 990 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 991 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 992 ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr); 993 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 994 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 995 PetscFunctionReturn(0); 996 } 997 998 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 999 1000 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1001 { 1002 PetscErrorCode ierr; 1003 PetscInt i,n; 1004 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1005 PetscReal *work; 1006 1007 PetscFunctionBegin; 1008 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1009 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1010 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1011 if (type == NORM_2) { 1012 for (i=0; i<n; i++) work[i] *= work[i]; 1013 } 1014 if (type == NORM_INFINITY) { 1015 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1016 } else { 1017 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1018 } 1019 ierr = PetscFree(work);CHKERRQ(ierr); 1020 if (type == NORM_2) { 1021 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1022 } 1023 PetscFunctionReturn(0); 1024 } 1025 1026 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1027 { 1028 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1029 PetscErrorCode ierr; 1030 PetscScalar *a; 1031 PetscInt m,n,i; 1032 1033 PetscFunctionBegin; 1034 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1035 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1036 for (i=0; i<m*n; i++) { 1037 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1038 } 1039 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1040 PetscFunctionReturn(0); 1041 } 1042 1043 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1044 1045 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1046 { 1047 PetscFunctionBegin; 1048 *missing = PETSC_FALSE; 1049 PetscFunctionReturn(0); 1050 } 1051 1052 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1053 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1054 1055 /* -------------------------------------------------------------------*/ 1056 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1057 MatGetRow_MPIDense, 1058 MatRestoreRow_MPIDense, 1059 MatMult_MPIDense, 1060 /* 4*/ MatMultAdd_MPIDense, 1061 MatMultTranspose_MPIDense, 1062 MatMultTransposeAdd_MPIDense, 1063 0, 1064 0, 1065 0, 1066 /* 10*/ 0, 1067 0, 1068 0, 1069 0, 1070 MatTranspose_MPIDense, 1071 /* 15*/ MatGetInfo_MPIDense, 1072 MatEqual_MPIDense, 1073 MatGetDiagonal_MPIDense, 1074 MatDiagonalScale_MPIDense, 1075 MatNorm_MPIDense, 1076 /* 20*/ MatAssemblyBegin_MPIDense, 1077 MatAssemblyEnd_MPIDense, 1078 MatSetOption_MPIDense, 1079 MatZeroEntries_MPIDense, 1080 /* 24*/ MatZeroRows_MPIDense, 1081 0, 1082 0, 1083 0, 1084 0, 1085 /* 29*/ MatSetUp_MPIDense, 1086 0, 1087 0, 1088 MatGetDiagonalBlock_MPIDense, 1089 0, 1090 /* 34*/ MatDuplicate_MPIDense, 1091 0, 1092 0, 1093 0, 1094 0, 1095 /* 39*/ MatAXPY_MPIDense, 1096 MatCreateSubMatrices_MPIDense, 1097 0, 1098 MatGetValues_MPIDense, 1099 0, 1100 /* 44*/ 0, 1101 MatScale_MPIDense, 1102 MatShift_Basic, 1103 0, 1104 0, 1105 /* 49*/ MatSetRandom_MPIDense, 1106 0, 1107 0, 1108 0, 1109 0, 1110 /* 54*/ 0, 1111 0, 1112 0, 1113 0, 1114 0, 1115 /* 59*/ MatCreateSubMatrix_MPIDense, 1116 MatDestroy_MPIDense, 1117 MatView_MPIDense, 1118 0, 1119 0, 1120 /* 64*/ 0, 1121 0, 1122 0, 1123 0, 1124 0, 1125 /* 69*/ 0, 1126 0, 1127 0, 1128 0, 1129 0, 1130 /* 74*/ 0, 1131 0, 1132 0, 1133 0, 1134 0, 1135 /* 79*/ 0, 1136 0, 1137 0, 1138 0, 1139 /* 83*/ MatLoad_MPIDense, 1140 0, 1141 0, 1142 0, 1143 0, 1144 0, 1145 #if defined(PETSC_HAVE_ELEMENTAL) 1146 /* 89*/ 0, 1147 0, 1148 #else 1149 /* 89*/ 0, 1150 0, 1151 #endif 1152 MatMatMultNumeric_MPIDense, 1153 0, 1154 0, 1155 /* 94*/ 0, 1156 0, 1157 0, 1158 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1159 0, 1160 /* 99*/ MatProductSetFromOptions_MPIDense, 1161 0, 1162 0, 1163 MatConjugate_MPIDense, 1164 0, 1165 /*104*/ 0, 1166 MatRealPart_MPIDense, 1167 MatImaginaryPart_MPIDense, 1168 0, 1169 0, 1170 /*109*/ 0, 1171 0, 1172 0, 1173 MatGetColumnVector_MPIDense, 1174 MatMissingDiagonal_MPIDense, 1175 /*114*/ 0, 1176 0, 1177 0, 1178 0, 1179 0, 1180 /*119*/ 0, 1181 0, 1182 0, 1183 0, 1184 0, 1185 /*124*/ 0, 1186 MatGetColumnNorms_MPIDense, 1187 0, 1188 0, 1189 0, 1190 /*129*/ 0, 1191 0, 1192 0, 1193 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1194 0, 1195 /*134*/ 0, 1196 0, 1197 0, 1198 0, 1199 0, 1200 /*139*/ 0, 1201 0, 1202 0, 1203 0, 1204 0, 1205 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1206 /*145*/ 0, 1207 0, 1208 0 1209 }; 1210 1211 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1212 { 1213 Mat_MPIDense *a; 1214 PetscErrorCode ierr; 1215 1216 PetscFunctionBegin; 1217 if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated"); 1218 mat->preallocated = PETSC_TRUE; 1219 /* Note: For now, when data is specified above, this assumes the user correctly 1220 allocates the local dense storage space. We should add error checking. */ 1221 1222 a = (Mat_MPIDense*)mat->data; 1223 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1224 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1225 a->nvec = mat->cmap->n; 1226 1227 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1228 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1229 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1230 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1231 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #if defined(PETSC_HAVE_ELEMENTAL) 1236 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1237 { 1238 Mat mat_elemental; 1239 PetscErrorCode ierr; 1240 PetscScalar *v; 1241 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1242 1243 PetscFunctionBegin; 1244 if (reuse == MAT_REUSE_MATRIX) { 1245 mat_elemental = *newmat; 1246 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1247 } else { 1248 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1249 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1250 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1251 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1252 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1253 } 1254 1255 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1256 for (i=0; i<N; i++) cols[i] = i; 1257 for (i=0; i<m; i++) rows[i] = rstart + i; 1258 1259 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1260 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1261 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1262 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1263 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1264 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1265 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1266 1267 if (reuse == MAT_INPLACE_MATRIX) { 1268 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1269 } else { 1270 *newmat = mat_elemental; 1271 } 1272 PetscFunctionReturn(0); 1273 } 1274 #endif 1275 1276 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1277 { 1278 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1279 PetscErrorCode ierr; 1280 1281 PetscFunctionBegin; 1282 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1287 { 1288 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1289 PetscErrorCode ierr; 1290 1291 PetscFunctionBegin; 1292 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1297 { 1298 PetscErrorCode ierr; 1299 Mat_MPIDense *mat; 1300 PetscInt m,nloc,N; 1301 1302 PetscFunctionBegin; 1303 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1304 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1305 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1306 PetscInt sum; 1307 1308 if (n == PETSC_DECIDE) { 1309 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1310 } 1311 /* Check sum(n) = N */ 1312 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1313 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1314 1315 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1316 } 1317 1318 /* numeric phase */ 1319 mat = (Mat_MPIDense*)(*outmat)->data; 1320 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1321 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1322 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1327 { 1328 Mat_MPIDense *a; 1329 PetscErrorCode ierr; 1330 1331 PetscFunctionBegin; 1332 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1333 mat->data = (void*)a; 1334 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1335 1336 mat->insertmode = NOT_SET_VALUES; 1337 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1338 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1339 1340 /* build cache for off array entries formed */ 1341 a->donotstash = PETSC_FALSE; 1342 1343 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1344 1345 /* stuff used for matrix vector multiply */ 1346 a->lvec = 0; 1347 a->Mvctx = 0; 1348 a->roworiented = PETSC_TRUE; 1349 1350 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1351 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1352 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1353 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1354 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1355 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1356 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1357 #if defined(PETSC_HAVE_ELEMENTAL) 1358 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1359 #endif 1360 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1361 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1362 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1363 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1364 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1365 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr); 1366 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr); 1367 1368 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1369 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1370 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1371 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1372 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 /*MC 1377 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1378 1379 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1380 and MATMPIDENSE otherwise. 1381 1382 Options Database Keys: 1383 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1384 1385 Level: beginner 1386 1387 1388 .seealso: MATSEQDENSE,MATMPIDENSE 1389 M*/ 1390 1391 /*@C 1392 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1393 1394 Not collective 1395 1396 Input Parameters: 1397 . B - the matrix 1398 - data - optional location of matrix data. Set data=NULL for PETSc 1399 to control all matrix memory allocation. 1400 1401 Notes: 1402 The dense format is fully compatible with standard Fortran 77 1403 storage by columns. 1404 1405 The data input variable is intended primarily for Fortran programmers 1406 who wish to allocate their own matrix memory space. Most users should 1407 set data=NULL. 1408 1409 Level: intermediate 1410 1411 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1412 @*/ 1413 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1414 { 1415 PetscErrorCode ierr; 1416 1417 PetscFunctionBegin; 1418 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 /*@ 1423 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1424 array provided by the user. This is useful to avoid copying an array 1425 into a matrix 1426 1427 Not Collective 1428 1429 Input Parameters: 1430 + mat - the matrix 1431 - array - the array in column major order 1432 1433 Notes: 1434 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1435 freed when the matrix is destroyed. 1436 1437 Level: developer 1438 1439 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1440 1441 @*/ 1442 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1443 { 1444 PetscErrorCode ierr; 1445 PetscFunctionBegin; 1446 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1447 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } 1450 1451 /*@ 1452 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1453 1454 Not Collective 1455 1456 Input Parameters: 1457 . mat - the matrix 1458 1459 Notes: 1460 You can only call this after a call to MatDensePlaceArray() 1461 1462 Level: developer 1463 1464 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1465 1466 @*/ 1467 PetscErrorCode MatDenseResetArray(Mat mat) 1468 { 1469 PetscErrorCode ierr; 1470 PetscFunctionBegin; 1471 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1472 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 /*@C 1477 MatCreateDense - Creates a parallel matrix in dense format. 1478 1479 Collective 1480 1481 Input Parameters: 1482 + comm - MPI communicator 1483 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1484 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1485 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1486 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1487 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1488 to control all matrix memory allocation. 1489 1490 Output Parameter: 1491 . A - the matrix 1492 1493 Notes: 1494 The dense format is fully compatible with standard Fortran 77 1495 storage by columns. 1496 1497 The data input variable is intended primarily for Fortran programmers 1498 who wish to allocate their own matrix memory space. Most users should 1499 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1500 1501 The user MUST specify either the local or global matrix dimensions 1502 (possibly both). 1503 1504 Level: intermediate 1505 1506 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1507 @*/ 1508 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1509 { 1510 PetscErrorCode ierr; 1511 PetscMPIInt size; 1512 1513 PetscFunctionBegin; 1514 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1515 PetscValidLogicalCollectiveBool(*A,!!data,6); 1516 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1517 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1518 if (size > 1) { 1519 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1520 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1521 if (data) { /* user provided data array, so no need to assemble */ 1522 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1523 (*A)->assembled = PETSC_TRUE; 1524 } 1525 } else { 1526 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1527 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1528 } 1529 PetscFunctionReturn(0); 1530 } 1531 1532 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1533 { 1534 Mat mat; 1535 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1536 PetscErrorCode ierr; 1537 1538 PetscFunctionBegin; 1539 *newmat = 0; 1540 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1541 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1542 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1543 a = (Mat_MPIDense*)mat->data; 1544 1545 mat->factortype = A->factortype; 1546 mat->assembled = PETSC_TRUE; 1547 mat->preallocated = PETSC_TRUE; 1548 1549 a->size = oldmat->size; 1550 a->rank = oldmat->rank; 1551 mat->insertmode = NOT_SET_VALUES; 1552 a->nvec = oldmat->nvec; 1553 a->donotstash = oldmat->donotstash; 1554 1555 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1556 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1557 1558 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1559 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1560 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1561 1562 *newmat = mat; 1563 PetscFunctionReturn(0); 1564 } 1565 1566 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1567 { 1568 PetscErrorCode ierr; 1569 PetscBool isbinary; 1570 #if defined(PETSC_HAVE_HDF5) 1571 PetscBool ishdf5; 1572 #endif 1573 1574 PetscFunctionBegin; 1575 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 1576 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 1577 /* force binary viewer to load .info file if it has not yet done so */ 1578 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1579 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1580 #if defined(PETSC_HAVE_HDF5) 1581 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 1582 #endif 1583 if (isbinary) { 1584 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 1585 #if defined(PETSC_HAVE_HDF5) 1586 } else if (ishdf5) { 1587 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 1588 #endif 1589 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 1590 PetscFunctionReturn(0); 1591 } 1592 1593 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1594 { 1595 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1596 Mat a,b; 1597 PetscBool flg; 1598 PetscErrorCode ierr; 1599 1600 PetscFunctionBegin; 1601 a = matA->A; 1602 b = matB->A; 1603 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1604 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1605 PetscFunctionReturn(0); 1606 } 1607 1608 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1609 { 1610 PetscErrorCode ierr; 1611 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1612 Mat_TransMatMultDense *atb = a->atbdense; 1613 1614 PetscFunctionBegin; 1615 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1616 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1617 ierr = PetscFree(atb);CHKERRQ(ierr); 1618 PetscFunctionReturn(0); 1619 } 1620 1621 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 1622 { 1623 PetscErrorCode ierr; 1624 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1625 Mat_MatTransMultDense *abt = a->abtdense; 1626 1627 PetscFunctionBegin; 1628 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 1629 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 1630 ierr = (abt->destroy)(A);CHKERRQ(ierr); 1631 ierr = PetscFree(abt);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1636 { 1637 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1638 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1639 Mat_TransMatMultDense *atb = c->atbdense; 1640 PetscErrorCode ierr; 1641 MPI_Comm comm; 1642 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1643 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1644 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1645 PetscScalar _DOne=1.0,_DZero=0.0; 1646 PetscBLASInt am,an,bn,aN; 1647 const PetscInt *ranges; 1648 1649 PetscFunctionBegin; 1650 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1651 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1652 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1653 1654 /* compute atbarray = aseq^T * bseq */ 1655 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1656 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1657 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1658 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1659 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1660 1661 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1662 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1663 1664 /* arrange atbarray into sendbuf */ 1665 k = 0; 1666 for (proc=0; proc<size; proc++) { 1667 for (j=0; j<cN; j++) { 1668 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1669 } 1670 } 1671 /* sum all atbarray to local values of C */ 1672 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1673 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1674 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 1678 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 1679 { 1680 PetscErrorCode ierr; 1681 MPI_Comm comm; 1682 PetscMPIInt size; 1683 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1684 Mat_MPIDense *c; 1685 Mat_TransMatMultDense *atb; 1686 1687 PetscFunctionBegin; 1688 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1689 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1690 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1691 } 1692 1693 /* create matrix product C */ 1694 ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1695 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 1696 ierr = MatSetUp(C);CHKERRQ(ierr); 1697 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1698 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1699 1700 /* create data structure for reuse C */ 1701 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1702 ierr = PetscNew(&atb);CHKERRQ(ierr); 1703 cM = C->rmap->N; 1704 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1705 1706 c = (Mat_MPIDense*)C->data; 1707 c->atbdense = atb; 1708 atb->destroy = C->ops->destroy; 1709 C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1710 PetscFunctionReturn(0); 1711 } 1712 1713 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 1714 { 1715 PetscErrorCode ierr; 1716 MPI_Comm comm; 1717 PetscMPIInt i, size; 1718 PetscInt maxRows, bufsiz; 1719 Mat_MPIDense *c; 1720 PetscMPIInt tag; 1721 PetscInt alg; 1722 Mat_MatTransMultDense *abt; 1723 Mat_Product *product = C->product; 1724 PetscBool flg; 1725 1726 PetscFunctionBegin; 1727 /* check local size of A and B */ 1728 if (A->cmap->n != B->cmap->n) { 1729 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 1730 } 1731 1732 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 1733 if (flg) { 1734 alg = 0; 1735 } else alg = 1; 1736 1737 /* setup matrix product C */ 1738 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 1739 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 1740 ierr = MatSetUp(C);CHKERRQ(ierr); 1741 ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr); 1742 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1743 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1744 1745 /* create data structure for reuse C */ 1746 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 1747 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1748 ierr = PetscNew(&abt);CHKERRQ(ierr); 1749 abt->tag = tag; 1750 abt->alg = alg; 1751 switch (alg) { 1752 case 1: /* alg: "cyclic" */ 1753 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 1754 bufsiz = A->cmap->N * maxRows; 1755 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 1756 break; 1757 default: /* alg: "allgatherv" */ 1758 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 1759 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 1760 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 1761 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 1762 break; 1763 } 1764 1765 c = (Mat_MPIDense*)C->data; 1766 c->abtdense = abt; 1767 abt->destroy = C->ops->destroy; 1768 C->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 1769 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 1770 PetscFunctionReturn(0); 1771 } 1772 1773 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 1774 { 1775 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1776 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1777 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 1778 Mat_MatTransMultDense *abt = c->abtdense; 1779 PetscErrorCode ierr; 1780 MPI_Comm comm; 1781 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 1782 PetscScalar *sendbuf, *recvbuf=0, *carray; 1783 PetscInt i,cK=A->cmap->N,k,j,bn; 1784 PetscScalar _DOne=1.0,_DZero=0.0; 1785 PetscBLASInt cm, cn, ck; 1786 MPI_Request reqs[2]; 1787 const PetscInt *ranges; 1788 1789 PetscFunctionBegin; 1790 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1791 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1792 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1793 1794 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 1795 bn = B->rmap->n; 1796 if (bseq->lda == bn) { 1797 sendbuf = bseq->v; 1798 } else { 1799 sendbuf = abt->buf[0]; 1800 for (k = 0, i = 0; i < cK; i++) { 1801 for (j = 0; j < bn; j++, k++) { 1802 sendbuf[k] = bseq->v[i * bseq->lda + j]; 1803 } 1804 } 1805 } 1806 if (size > 1) { 1807 sendto = (rank + size - 1) % size; 1808 recvfrom = (rank + size + 1) % size; 1809 } else { 1810 sendto = recvfrom = 0; 1811 } 1812 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 1813 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 1814 recvisfrom = rank; 1815 for (i = 0; i < size; i++) { 1816 /* we have finished receiving in sending, bufs can be read/modified */ 1817 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 1818 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 1819 1820 if (nextrecvisfrom != rank) { 1821 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 1822 sendsiz = cK * bn; 1823 recvsiz = cK * nextbn; 1824 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 1825 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 1826 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 1827 } 1828 1829 /* local aseq * sendbuf^T */ 1830 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 1831 carray = &cseq->v[cseq->lda * ranges[recvisfrom]]; 1832 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda)); 1833 1834 if (nextrecvisfrom != rank) { 1835 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 1836 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1837 } 1838 bn = nextbn; 1839 recvisfrom = nextrecvisfrom; 1840 sendbuf = recvbuf; 1841 } 1842 PetscFunctionReturn(0); 1843 } 1844 1845 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 1846 { 1847 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1848 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1849 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 1850 Mat_MatTransMultDense *abt = c->abtdense; 1851 PetscErrorCode ierr; 1852 MPI_Comm comm; 1853 PetscMPIInt rank,size; 1854 PetscScalar *sendbuf, *recvbuf; 1855 PetscInt i,cK=A->cmap->N,k,j,bn; 1856 PetscScalar _DOne=1.0,_DZero=0.0; 1857 PetscBLASInt cm, cn, ck; 1858 1859 PetscFunctionBegin; 1860 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1861 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1862 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1863 1864 /* copy transpose of B into buf[0] */ 1865 bn = B->rmap->n; 1866 sendbuf = abt->buf[0]; 1867 recvbuf = abt->buf[1]; 1868 for (k = 0, j = 0; j < bn; j++) { 1869 for (i = 0; i < cK; i++, k++) { 1870 sendbuf[k] = bseq->v[i * bseq->lda + j]; 1871 } 1872 } 1873 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 1874 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 1875 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 1876 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 1877 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda)); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1882 { 1883 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1884 Mat_MatTransMultDense *abt = c->abtdense; 1885 PetscErrorCode ierr; 1886 1887 PetscFunctionBegin; 1888 switch (abt->alg) { 1889 case 1: 1890 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 1891 break; 1892 default: 1893 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 1894 break; 1895 } 1896 PetscFunctionReturn(0); 1897 } 1898 1899 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1900 { 1901 PetscErrorCode ierr; 1902 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1903 Mat_MatMultDense *ab = a->abdense; 1904 1905 PetscFunctionBegin; 1906 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1907 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1908 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1909 1910 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1911 ierr = PetscFree(ab);CHKERRQ(ierr); 1912 PetscFunctionReturn(0); 1913 } 1914 1915 #if defined(PETSC_HAVE_ELEMENTAL) 1916 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1917 { 1918 PetscErrorCode ierr; 1919 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1920 Mat_MatMultDense *ab=c->abdense; 1921 1922 PetscFunctionBegin; 1923 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1924 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1925 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1926 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 1931 { 1932 PetscErrorCode ierr; 1933 Mat Ae,Be,Ce; 1934 Mat_MPIDense *c; 1935 Mat_MatMultDense *ab; 1936 1937 PetscFunctionBegin; 1938 /* check local size of A and B */ 1939 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1940 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1941 } 1942 1943 /* create elemental matrices Ae and Be */ 1944 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 1945 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1946 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 1947 ierr = MatSetUp(Ae);CHKERRQ(ierr); 1948 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1949 1950 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 1951 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 1952 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 1953 ierr = MatSetUp(Be);CHKERRQ(ierr); 1954 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1955 1956 /* compute symbolic Ce = Ae*Be */ 1957 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 1958 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 1959 1960 /* setup C */ 1961 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1962 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 1963 ierr = MatSetUp(C);CHKERRQ(ierr); 1964 1965 /* create data structure for reuse Cdense */ 1966 ierr = PetscNew(&ab);CHKERRQ(ierr); 1967 c = (Mat_MPIDense*)C->data; 1968 c->abdense = ab; 1969 1970 ab->Ae = Ae; 1971 ab->Be = Be; 1972 ab->Ce = Ce; 1973 ab->destroy = C->ops->destroy; 1974 C->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1975 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1976 C->ops->productnumeric = MatProductNumeric_AB; 1977 PetscFunctionReturn(0); 1978 } 1979 #endif 1980 /* ----------------------------------------------- */ 1981 #if defined(PETSC_HAVE_ELEMENTAL) 1982 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 1983 { 1984 PetscFunctionBegin; 1985 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 1986 C->ops->productsymbolic = MatProductSymbolic_AB; 1987 C->ops->productnumeric = MatProductNumeric_AB; 1988 PetscFunctionReturn(0); 1989 } 1990 #endif 1991 1992 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C) 1993 { 1994 PetscErrorCode ierr; 1995 Mat_Product *product = C->product; 1996 1997 PetscFunctionBegin; 1998 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr); 1999 C->ops->productnumeric = MatProductNumeric_AtB; 2000 C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense; 2001 PetscFunctionReturn(0); 2002 } 2003 2004 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2005 { 2006 Mat_Product *product = C->product; 2007 Mat A = product->A,B=product->B; 2008 2009 PetscFunctionBegin; 2010 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2011 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2012 2013 C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense; 2014 PetscFunctionReturn(0); 2015 } 2016 2017 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2018 { 2019 PetscErrorCode ierr; 2020 Mat_Product *product = C->product; 2021 const char *algTypes[2] = {"allgatherv","cyclic"}; 2022 PetscInt alg,nalg = 2; 2023 PetscBool flg = PETSC_FALSE; 2024 2025 PetscFunctionBegin; 2026 /* Set default algorithm */ 2027 alg = 0; /* default is allgatherv */ 2028 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2029 if (flg) { 2030 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2031 } 2032 2033 /* Get runtime option */ 2034 if (product->api_user) { 2035 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2036 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2037 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2038 } else { 2039 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2040 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2041 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2042 } 2043 if (flg) { 2044 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2045 } 2046 2047 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2048 C->ops->productsymbolic = MatProductSymbolic_ABt; 2049 C->ops->productnumeric = MatProductNumeric_ABt; 2050 PetscFunctionReturn(0); 2051 } 2052 2053 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2054 { 2055 PetscErrorCode ierr; 2056 Mat_Product *product = C->product; 2057 2058 PetscFunctionBegin; 2059 switch (product->type) { 2060 #if defined(PETSC_HAVE_ELEMENTAL) 2061 case MATPRODUCT_AB: 2062 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 2063 break; 2064 #endif 2065 case MATPRODUCT_AtB: 2066 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 2067 break; 2068 case MATPRODUCT_ABt: 2069 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 2070 break; 2071 default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075