1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 32 if (flg) *B = mat->A; 33 else *B = A; 34 PetscFunctionReturn(0); 35 } 36 37 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 38 { 39 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 40 PetscErrorCode ierr; 41 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 42 43 PetscFunctionBegin; 44 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 45 lrow = row - rstart; 46 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 51 { 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 56 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 61 { 62 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 63 PetscErrorCode ierr; 64 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 65 PetscScalar *array; 66 MPI_Comm comm; 67 Mat B; 68 69 PetscFunctionBegin; 70 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 71 72 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 73 if (!B) { 74 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 75 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 76 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 77 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 78 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 79 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 80 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 81 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 82 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 84 *a = B; 85 ierr = MatDestroy(&B);CHKERRQ(ierr); 86 } else *a = B; 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 91 { 92 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 93 PetscErrorCode ierr; 94 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 95 PetscBool roworiented = A->roworiented; 96 97 PetscFunctionBegin; 98 for (i=0; i<m; i++) { 99 if (idxm[i] < 0) continue; 100 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 101 if (idxm[i] >= rstart && idxm[i] < rend) { 102 row = idxm[i] - rstart; 103 if (roworiented) { 104 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 105 } else { 106 for (j=0; j<n; j++) { 107 if (idxn[j] < 0) continue; 108 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 109 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 110 } 111 } 112 } else if (!A->donotstash) { 113 mat->assembled = PETSC_FALSE; 114 if (roworiented) { 115 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 116 } else { 117 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 118 } 119 } 120 } 121 PetscFunctionReturn(0); 122 } 123 124 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 125 { 126 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 127 PetscErrorCode ierr; 128 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 129 130 PetscFunctionBegin; 131 for (i=0; i<m; i++) { 132 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 133 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 134 if (idxm[i] >= rstart && idxm[i] < rend) { 135 row = idxm[i] - rstart; 136 for (j=0; j<n; j++) { 137 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 138 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 139 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 140 } 141 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 142 } 143 PetscFunctionReturn(0); 144 } 145 146 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 147 { 148 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 149 PetscErrorCode ierr; 150 151 PetscFunctionBegin; 152 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 153 PetscFunctionReturn(0); 154 } 155 156 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 157 { 158 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 159 PetscErrorCode ierr; 160 161 PetscFunctionBegin; 162 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 167 { 168 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 169 PetscErrorCode ierr; 170 171 PetscFunctionBegin; 172 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 173 PetscFunctionReturn(0); 174 } 175 176 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 177 { 178 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 179 PetscErrorCode ierr; 180 181 PetscFunctionBegin; 182 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 187 { 188 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 189 PetscErrorCode ierr; 190 191 PetscFunctionBegin; 192 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 193 PetscFunctionReturn(0); 194 } 195 196 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 197 { 198 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 199 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 200 PetscErrorCode ierr; 201 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 202 const PetscInt *irow,*icol; 203 PetscScalar *av,*bv,*v = lmat->v; 204 Mat newmat; 205 IS iscol_local; 206 MPI_Comm comm_is,comm_mat; 207 208 PetscFunctionBegin; 209 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 210 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 211 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 212 213 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 214 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 215 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 216 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 217 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 218 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 219 220 /* No parallel redistribution currently supported! Should really check each index set 221 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 222 original matrix! */ 223 224 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 225 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 226 227 /* Check submatrix call */ 228 if (scall == MAT_REUSE_MATRIX) { 229 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 230 /* Really need to test rows and column sizes! */ 231 newmat = *B; 232 } else { 233 /* Create and fill new matrix */ 234 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 235 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 236 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 237 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 238 } 239 240 /* Now extract the data pointers and do the copy, column at a time */ 241 newmatd = (Mat_MPIDense*)newmat->data; 242 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 243 244 for (i=0; i<Ncols; i++) { 245 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 246 for (j=0; j<nrows; j++) { 247 *bv++ = av[irow[j] - rstart]; 248 } 249 } 250 251 /* Assemble the matrices so that the correct flags are set */ 252 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 253 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 254 255 /* Free work space */ 256 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 257 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 258 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 259 *B = newmat; 260 PetscFunctionReturn(0); 261 } 262 263 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 264 { 265 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 266 PetscErrorCode ierr; 267 268 PetscFunctionBegin; 269 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 276 PetscErrorCode ierr; 277 278 PetscFunctionBegin; 279 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 280 PetscFunctionReturn(0); 281 } 282 283 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 284 { 285 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 286 MPI_Comm comm; 287 PetscErrorCode ierr; 288 PetscInt nstash,reallocs; 289 InsertMode addv; 290 291 PetscFunctionBegin; 292 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 293 /* make sure all processors are either in INSERTMODE or ADDMODE */ 294 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 295 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 296 mat->insertmode = addv; /* in case this processor had no cache */ 297 298 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 299 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 300 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 301 PetscFunctionReturn(0); 302 } 303 304 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 305 { 306 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 307 PetscErrorCode ierr; 308 PetscInt i,*row,*col,flg,j,rstart,ncols; 309 PetscMPIInt n; 310 PetscScalar *val; 311 312 PetscFunctionBegin; 313 /* wait on receives */ 314 while (1) { 315 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 316 if (!flg) break; 317 318 for (i=0; i<n;) { 319 /* Now identify the consecutive vals belonging to the same row */ 320 for (j=i,rstart=row[j]; j<n; j++) { 321 if (row[j] != rstart) break; 322 } 323 if (j < n) ncols = j-i; 324 else ncols = n-i; 325 /* Now assemble all these values with a single function call */ 326 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 327 i = j; 328 } 329 } 330 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 331 332 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 333 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 334 335 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 336 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 337 } 338 PetscFunctionReturn(0); 339 } 340 341 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 342 { 343 PetscErrorCode ierr; 344 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 345 346 PetscFunctionBegin; 347 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* the code does not do the diagonal entries correctly unless the 352 matrix is square and the column and row owerships are identical. 353 This is a BUG. The only way to fix it seems to be to access 354 mdn->A and mdn->B directly and not through the MatZeroRows() 355 routine. 356 */ 357 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 358 { 359 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 360 PetscErrorCode ierr; 361 PetscInt i,*owners = A->rmap->range; 362 PetscInt *sizes,j,idx,nsends; 363 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 364 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 365 PetscInt *lens,*lrows,*values; 366 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 367 MPI_Comm comm; 368 MPI_Request *send_waits,*recv_waits; 369 MPI_Status recv_status,*send_status; 370 PetscBool found; 371 const PetscScalar *xx; 372 PetscScalar *bb; 373 374 PetscFunctionBegin; 375 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 376 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 377 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 378 /* first count number of contributors to each processor */ 379 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 380 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 381 for (i=0; i<N; i++) { 382 idx = rows[i]; 383 found = PETSC_FALSE; 384 for (j=0; j<size; j++) { 385 if (idx >= owners[j] && idx < owners[j+1]) { 386 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 387 } 388 } 389 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 390 } 391 nsends = 0; 392 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 393 394 /* inform other processors of number of messages and max length*/ 395 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 396 397 /* post receives: */ 398 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 399 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 400 for (i=0; i<nrecvs; i++) { 401 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 402 } 403 404 /* do sends: 405 1) starts[i] gives the starting index in svalues for stuff going to 406 the ith processor 407 */ 408 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 409 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 410 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 411 412 starts[0] = 0; 413 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 414 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 415 416 starts[0] = 0; 417 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 418 count = 0; 419 for (i=0; i<size; i++) { 420 if (sizes[2*i+1]) { 421 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 422 } 423 } 424 ierr = PetscFree(starts);CHKERRQ(ierr); 425 426 base = owners[rank]; 427 428 /* wait on receives */ 429 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 430 count = nrecvs; 431 slen = 0; 432 while (count) { 433 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 434 /* unpack receives into our local space */ 435 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 436 437 source[imdex] = recv_status.MPI_SOURCE; 438 lens[imdex] = n; 439 slen += n; 440 count--; 441 } 442 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 443 444 /* move the data into the send scatter */ 445 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 446 count = 0; 447 for (i=0; i<nrecvs; i++) { 448 values = rvalues + i*nmax; 449 for (j=0; j<lens[i]; j++) { 450 lrows[count++] = values[j] - base; 451 } 452 } 453 ierr = PetscFree(rvalues);CHKERRQ(ierr); 454 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 455 ierr = PetscFree(owner);CHKERRQ(ierr); 456 ierr = PetscFree(sizes);CHKERRQ(ierr); 457 458 /* fix right hand side if needed */ 459 if (x && b) { 460 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 461 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 462 for (i=0; i<slen; i++) { 463 bb[lrows[i]] = diag*xx[lrows[i]]; 464 } 465 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 466 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 467 } 468 469 /* actually zap the local rows */ 470 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 471 if (diag != 0.0) { 472 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 473 PetscInt m = ll->lda, i; 474 475 for (i=0; i<slen; i++) { 476 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 477 } 478 } 479 ierr = PetscFree(lrows);CHKERRQ(ierr); 480 481 /* wait on sends */ 482 if (nsends) { 483 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 484 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 485 ierr = PetscFree(send_status);CHKERRQ(ierr); 486 } 487 ierr = PetscFree(send_waits);CHKERRQ(ierr); 488 ierr = PetscFree(svalues);CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 493 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 494 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 495 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 496 497 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 498 { 499 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 500 PetscErrorCode ierr; 501 502 PetscFunctionBegin; 503 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 504 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 505 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 506 PetscFunctionReturn(0); 507 } 508 509 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 510 { 511 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 512 PetscErrorCode ierr; 513 514 PetscFunctionBegin; 515 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 516 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 517 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 522 { 523 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 524 PetscErrorCode ierr; 525 PetscScalar zero = 0.0; 526 527 PetscFunctionBegin; 528 ierr = VecSet(yy,zero);CHKERRQ(ierr); 529 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 530 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 531 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } 534 535 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 536 { 537 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 538 PetscErrorCode ierr; 539 540 PetscFunctionBegin; 541 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 542 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 543 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 544 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 545 PetscFunctionReturn(0); 546 } 547 548 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 549 { 550 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 551 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 552 PetscErrorCode ierr; 553 PetscInt len,i,n,m = A->rmap->n,radd; 554 PetscScalar *x,zero = 0.0; 555 556 PetscFunctionBegin; 557 ierr = VecSet(v,zero);CHKERRQ(ierr); 558 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 559 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 560 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 561 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 562 radd = A->rmap->rstart*m; 563 for (i=0; i<len; i++) { 564 x[i] = aloc->v[radd + i*m + i]; 565 } 566 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 567 PetscFunctionReturn(0); 568 } 569 570 PetscErrorCode MatDestroy_MPIDense(Mat mat) 571 { 572 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 573 PetscErrorCode ierr; 574 575 PetscFunctionBegin; 576 #if defined(PETSC_USE_LOG) 577 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 578 #endif 579 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 580 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 581 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 582 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 583 584 ierr = PetscFree(mat->data);CHKERRQ(ierr); 585 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 586 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 594 #if defined(PETSC_HAVE_ELEMENTAL) 595 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 596 #endif 597 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 607 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 608 PetscFunctionReturn(0); 609 } 610 611 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 612 613 #include <petscdraw.h> 614 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 615 { 616 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 617 PetscErrorCode ierr; 618 PetscMPIInt rank = mdn->rank; 619 PetscViewerType vtype; 620 PetscBool iascii,isdraw; 621 PetscViewer sviewer; 622 PetscViewerFormat format; 623 624 PetscFunctionBegin; 625 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 626 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 627 if (iascii) { 628 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 629 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 630 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 631 MatInfo info; 632 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 633 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 634 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 635 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 636 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 637 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 638 PetscFunctionReturn(0); 639 } else if (format == PETSC_VIEWER_ASCII_INFO) { 640 PetscFunctionReturn(0); 641 } 642 } else if (isdraw) { 643 PetscDraw draw; 644 PetscBool isnull; 645 646 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 647 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 648 if (isnull) PetscFunctionReturn(0); 649 } 650 651 { 652 /* assemble the entire matrix onto first processor. */ 653 Mat A; 654 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 655 PetscInt *cols; 656 PetscScalar *vals; 657 658 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 659 if (!rank) { 660 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 661 } else { 662 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 663 } 664 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 665 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 666 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 667 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 668 669 /* Copy the matrix ... This isn't the most efficient means, 670 but it's quick for now */ 671 A->insertmode = INSERT_VALUES; 672 673 row = mat->rmap->rstart; 674 m = mdn->A->rmap->n; 675 for (i=0; i<m; i++) { 676 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 677 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 678 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 679 row++; 680 } 681 682 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 683 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 684 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 685 if (!rank) { 686 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 687 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 688 } 689 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 690 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 691 ierr = MatDestroy(&A);CHKERRQ(ierr); 692 } 693 PetscFunctionReturn(0); 694 } 695 696 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 697 { 698 PetscErrorCode ierr; 699 PetscBool iascii,isbinary,isdraw,issocket; 700 701 PetscFunctionBegin; 702 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 703 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 704 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 705 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 706 707 if (iascii || issocket || isdraw) { 708 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 709 } else if (isbinary) { 710 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 711 } 712 PetscFunctionReturn(0); 713 } 714 715 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 716 { 717 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 718 Mat mdn = mat->A; 719 PetscErrorCode ierr; 720 PetscLogDouble isend[5],irecv[5]; 721 722 PetscFunctionBegin; 723 info->block_size = 1.0; 724 725 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 726 727 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 728 isend[3] = info->memory; isend[4] = info->mallocs; 729 if (flag == MAT_LOCAL) { 730 info->nz_used = isend[0]; 731 info->nz_allocated = isend[1]; 732 info->nz_unneeded = isend[2]; 733 info->memory = isend[3]; 734 info->mallocs = isend[4]; 735 } else if (flag == MAT_GLOBAL_MAX) { 736 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 737 738 info->nz_used = irecv[0]; 739 info->nz_allocated = irecv[1]; 740 info->nz_unneeded = irecv[2]; 741 info->memory = irecv[3]; 742 info->mallocs = irecv[4]; 743 } else if (flag == MAT_GLOBAL_SUM) { 744 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 745 746 info->nz_used = irecv[0]; 747 info->nz_allocated = irecv[1]; 748 info->nz_unneeded = irecv[2]; 749 info->memory = irecv[3]; 750 info->mallocs = irecv[4]; 751 } 752 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 753 info->fill_ratio_needed = 0; 754 info->factor_mallocs = 0; 755 PetscFunctionReturn(0); 756 } 757 758 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 759 { 760 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 761 PetscErrorCode ierr; 762 763 PetscFunctionBegin; 764 switch (op) { 765 case MAT_NEW_NONZERO_LOCATIONS: 766 case MAT_NEW_NONZERO_LOCATION_ERR: 767 case MAT_NEW_NONZERO_ALLOCATION_ERR: 768 MatCheckPreallocated(A,1); 769 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 770 break; 771 case MAT_ROW_ORIENTED: 772 MatCheckPreallocated(A,1); 773 a->roworiented = flg; 774 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 775 break; 776 case MAT_NEW_DIAGONALS: 777 case MAT_KEEP_NONZERO_PATTERN: 778 case MAT_USE_HASH_TABLE: 779 case MAT_SORTED_FULL: 780 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 781 break; 782 case MAT_IGNORE_OFF_PROC_ENTRIES: 783 a->donotstash = flg; 784 break; 785 case MAT_SYMMETRIC: 786 case MAT_STRUCTURALLY_SYMMETRIC: 787 case MAT_HERMITIAN: 788 case MAT_SYMMETRY_ETERNAL: 789 case MAT_IGNORE_LOWER_TRIANGULAR: 790 case MAT_IGNORE_ZERO_ENTRIES: 791 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 792 break; 793 default: 794 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 800 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 801 { 802 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 803 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 804 const PetscScalar *l,*r; 805 PetscScalar x,*v; 806 PetscErrorCode ierr; 807 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 808 809 PetscFunctionBegin; 810 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 811 if (ll) { 812 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 813 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 814 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 815 for (i=0; i<m; i++) { 816 x = l[i]; 817 v = mat->v + i; 818 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 819 } 820 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 821 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 822 } 823 if (rr) { 824 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 825 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 826 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 827 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 829 for (i=0; i<n; i++) { 830 x = r[i]; 831 v = mat->v + i*m; 832 for (j=0; j<m; j++) (*v++) *= x; 833 } 834 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 835 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 836 } 837 PetscFunctionReturn(0); 838 } 839 840 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 841 { 842 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 843 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 844 PetscErrorCode ierr; 845 PetscInt i,j; 846 PetscReal sum = 0.0; 847 PetscScalar *v = mat->v; 848 849 PetscFunctionBegin; 850 if (mdn->size == 1) { 851 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 852 } else { 853 if (type == NORM_FROBENIUS) { 854 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 855 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 856 } 857 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 858 *nrm = PetscSqrtReal(*nrm); 859 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 860 } else if (type == NORM_1) { 861 PetscReal *tmp,*tmp2; 862 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 863 *nrm = 0.0; 864 v = mat->v; 865 for (j=0; j<mdn->A->cmap->n; j++) { 866 for (i=0; i<mdn->A->rmap->n; i++) { 867 tmp[j] += PetscAbsScalar(*v); v++; 868 } 869 } 870 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 871 for (j=0; j<A->cmap->N; j++) { 872 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 873 } 874 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 875 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 876 } else if (type == NORM_INFINITY) { /* max row norm */ 877 PetscReal ntemp; 878 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 879 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 880 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 881 } 882 PetscFunctionReturn(0); 883 } 884 885 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 886 { 887 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 888 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 889 Mat B; 890 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 891 PetscErrorCode ierr; 892 PetscInt j,i; 893 PetscScalar *v; 894 895 PetscFunctionBegin; 896 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 897 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 898 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 899 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 900 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 901 } else { 902 B = *matout; 903 } 904 905 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 906 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 907 for (i=0; i<m; i++) rwork[i] = rstart + i; 908 for (j=0; j<n; j++) { 909 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 910 v += m; 911 } 912 ierr = PetscFree(rwork);CHKERRQ(ierr); 913 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 915 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 916 *matout = B; 917 } else { 918 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 919 } 920 PetscFunctionReturn(0); 921 } 922 923 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 924 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 925 926 PetscErrorCode MatSetUp_MPIDense(Mat A) 927 { 928 PetscErrorCode ierr; 929 930 PetscFunctionBegin; 931 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 932 PetscFunctionReturn(0); 933 } 934 935 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 936 { 937 PetscErrorCode ierr; 938 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 939 940 PetscFunctionBegin; 941 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 942 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 PetscErrorCode MatConjugate_MPIDense(Mat mat) 947 { 948 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 949 PetscErrorCode ierr; 950 951 PetscFunctionBegin; 952 ierr = MatConjugate(a->A);CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatRealPart_MPIDense(Mat A) 957 { 958 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 959 PetscErrorCode ierr; 960 961 PetscFunctionBegin; 962 ierr = MatRealPart(a->A);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 967 { 968 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 969 PetscErrorCode ierr; 970 971 PetscFunctionBegin; 972 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 977 { 978 PetscErrorCode ierr; 979 PetscScalar *x; 980 const PetscScalar *a; 981 PetscInt lda; 982 983 PetscFunctionBegin; 984 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 985 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 986 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 987 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 988 ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr); 989 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 990 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 995 996 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 997 { 998 PetscErrorCode ierr; 999 PetscInt i,n; 1000 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1001 PetscReal *work; 1002 1003 PetscFunctionBegin; 1004 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1005 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1006 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1007 if (type == NORM_2) { 1008 for (i=0; i<n; i++) work[i] *= work[i]; 1009 } 1010 if (type == NORM_INFINITY) { 1011 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1012 } else { 1013 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1014 } 1015 ierr = PetscFree(work);CHKERRQ(ierr); 1016 if (type == NORM_2) { 1017 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1018 } 1019 PetscFunctionReturn(0); 1020 } 1021 1022 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1023 { 1024 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1025 PetscErrorCode ierr; 1026 PetscScalar *a; 1027 PetscInt m,n,i; 1028 1029 PetscFunctionBegin; 1030 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1031 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1032 for (i=0; i<m*n; i++) { 1033 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1034 } 1035 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1040 1041 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1042 { 1043 PetscFunctionBegin; 1044 *missing = PETSC_FALSE; 1045 PetscFunctionReturn(0); 1046 } 1047 1048 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1049 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1050 1051 /* -------------------------------------------------------------------*/ 1052 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1053 MatGetRow_MPIDense, 1054 MatRestoreRow_MPIDense, 1055 MatMult_MPIDense, 1056 /* 4*/ MatMultAdd_MPIDense, 1057 MatMultTranspose_MPIDense, 1058 MatMultTransposeAdd_MPIDense, 1059 0, 1060 0, 1061 0, 1062 /* 10*/ 0, 1063 0, 1064 0, 1065 0, 1066 MatTranspose_MPIDense, 1067 /* 15*/ MatGetInfo_MPIDense, 1068 MatEqual_MPIDense, 1069 MatGetDiagonal_MPIDense, 1070 MatDiagonalScale_MPIDense, 1071 MatNorm_MPIDense, 1072 /* 20*/ MatAssemblyBegin_MPIDense, 1073 MatAssemblyEnd_MPIDense, 1074 MatSetOption_MPIDense, 1075 MatZeroEntries_MPIDense, 1076 /* 24*/ MatZeroRows_MPIDense, 1077 0, 1078 0, 1079 0, 1080 0, 1081 /* 29*/ MatSetUp_MPIDense, 1082 0, 1083 0, 1084 MatGetDiagonalBlock_MPIDense, 1085 0, 1086 /* 34*/ MatDuplicate_MPIDense, 1087 0, 1088 0, 1089 0, 1090 0, 1091 /* 39*/ MatAXPY_MPIDense, 1092 MatCreateSubMatrices_MPIDense, 1093 0, 1094 MatGetValues_MPIDense, 1095 0, 1096 /* 44*/ 0, 1097 MatScale_MPIDense, 1098 MatShift_Basic, 1099 0, 1100 0, 1101 /* 49*/ MatSetRandom_MPIDense, 1102 0, 1103 0, 1104 0, 1105 0, 1106 /* 54*/ 0, 1107 0, 1108 0, 1109 0, 1110 0, 1111 /* 59*/ MatCreateSubMatrix_MPIDense, 1112 MatDestroy_MPIDense, 1113 MatView_MPIDense, 1114 0, 1115 0, 1116 /* 64*/ 0, 1117 0, 1118 0, 1119 0, 1120 0, 1121 /* 69*/ 0, 1122 0, 1123 0, 1124 0, 1125 0, 1126 /* 74*/ 0, 1127 0, 1128 0, 1129 0, 1130 0, 1131 /* 79*/ 0, 1132 0, 1133 0, 1134 0, 1135 /* 83*/ MatLoad_MPIDense, 1136 0, 1137 0, 1138 0, 1139 0, 1140 0, 1141 #if defined(PETSC_HAVE_ELEMENTAL) 1142 /* 89*/ 0, 1143 0, 1144 #else 1145 /* 89*/ 0, 1146 0, 1147 #endif 1148 MatMatMultNumeric_MPIDense, 1149 0, 1150 0, 1151 /* 94*/ 0, 1152 0, 1153 0, 1154 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1155 0, 1156 /* 99*/ MatProductSetFromOptions_MPIDense, 1157 0, 1158 0, 1159 MatConjugate_MPIDense, 1160 0, 1161 /*104*/ 0, 1162 MatRealPart_MPIDense, 1163 MatImaginaryPart_MPIDense, 1164 0, 1165 0, 1166 /*109*/ 0, 1167 0, 1168 0, 1169 MatGetColumnVector_MPIDense, 1170 MatMissingDiagonal_MPIDense, 1171 /*114*/ 0, 1172 0, 1173 0, 1174 0, 1175 0, 1176 /*119*/ 0, 1177 0, 1178 0, 1179 0, 1180 0, 1181 /*124*/ 0, 1182 MatGetColumnNorms_MPIDense, 1183 0, 1184 0, 1185 0, 1186 /*129*/ 0, 1187 0, 1188 0, 1189 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1190 0, 1191 /*134*/ 0, 1192 0, 1193 0, 1194 0, 1195 0, 1196 /*139*/ 0, 1197 0, 1198 0, 1199 0, 1200 0, 1201 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1202 /*145*/ 0, 1203 0, 1204 0 1205 }; 1206 1207 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1208 { 1209 Mat_MPIDense *a; 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated"); 1214 mat->preallocated = PETSC_TRUE; 1215 /* Note: For now, when data is specified above, this assumes the user correctly 1216 allocates the local dense storage space. We should add error checking. */ 1217 1218 a = (Mat_MPIDense*)mat->data; 1219 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1220 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1221 a->nvec = mat->cmap->n; 1222 1223 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1224 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1225 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1226 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1227 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #if defined(PETSC_HAVE_ELEMENTAL) 1232 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1233 { 1234 Mat mat_elemental; 1235 PetscErrorCode ierr; 1236 PetscScalar *v; 1237 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1238 1239 PetscFunctionBegin; 1240 if (reuse == MAT_REUSE_MATRIX) { 1241 mat_elemental = *newmat; 1242 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1243 } else { 1244 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1245 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1246 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1247 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1248 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1249 } 1250 1251 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1252 for (i=0; i<N; i++) cols[i] = i; 1253 for (i=0; i<m; i++) rows[i] = rstart + i; 1254 1255 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1256 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1257 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1258 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1259 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1260 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1261 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1262 1263 if (reuse == MAT_INPLACE_MATRIX) { 1264 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1265 } else { 1266 *newmat = mat_elemental; 1267 } 1268 PetscFunctionReturn(0); 1269 } 1270 #endif 1271 1272 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1273 { 1274 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1275 PetscErrorCode ierr; 1276 1277 PetscFunctionBegin; 1278 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1283 { 1284 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1293 { 1294 PetscErrorCode ierr; 1295 Mat_MPIDense *mat; 1296 PetscInt m,nloc,N; 1297 1298 PetscFunctionBegin; 1299 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1300 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1301 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1302 PetscInt sum; 1303 1304 if (n == PETSC_DECIDE) { 1305 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1306 } 1307 /* Check sum(n) = N */ 1308 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1309 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1310 1311 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1312 } 1313 1314 /* numeric phase */ 1315 mat = (Mat_MPIDense*)(*outmat)->data; 1316 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1317 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1318 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1323 { 1324 Mat_MPIDense *a; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1329 mat->data = (void*)a; 1330 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1331 1332 mat->insertmode = NOT_SET_VALUES; 1333 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1334 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1335 1336 /* build cache for off array entries formed */ 1337 a->donotstash = PETSC_FALSE; 1338 1339 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1340 1341 /* stuff used for matrix vector multiply */ 1342 a->lvec = 0; 1343 a->Mvctx = 0; 1344 a->roworiented = PETSC_TRUE; 1345 1346 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1347 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1348 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1349 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1350 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1351 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1352 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1353 #if defined(PETSC_HAVE_ELEMENTAL) 1354 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1355 #endif 1356 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1357 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1358 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1359 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1360 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1361 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr); 1362 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr); 1363 1364 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1365 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1366 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1367 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1368 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 /*MC 1373 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1374 1375 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1376 and MATMPIDENSE otherwise. 1377 1378 Options Database Keys: 1379 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1380 1381 Level: beginner 1382 1383 1384 .seealso: MATSEQDENSE,MATMPIDENSE 1385 M*/ 1386 1387 /*@C 1388 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1389 1390 Not collective 1391 1392 Input Parameters: 1393 . B - the matrix 1394 - data - optional location of matrix data. Set data=NULL for PETSc 1395 to control all matrix memory allocation. 1396 1397 Notes: 1398 The dense format is fully compatible with standard Fortran 77 1399 storage by columns. 1400 1401 The data input variable is intended primarily for Fortran programmers 1402 who wish to allocate their own matrix memory space. Most users should 1403 set data=NULL. 1404 1405 Level: intermediate 1406 1407 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1408 @*/ 1409 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1410 { 1411 PetscErrorCode ierr; 1412 1413 PetscFunctionBegin; 1414 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1415 PetscFunctionReturn(0); 1416 } 1417 1418 /*@ 1419 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1420 array provided by the user. This is useful to avoid copying an array 1421 into a matrix 1422 1423 Not Collective 1424 1425 Input Parameters: 1426 + mat - the matrix 1427 - array - the array in column major order 1428 1429 Notes: 1430 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1431 freed when the matrix is destroyed. 1432 1433 Level: developer 1434 1435 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1436 1437 @*/ 1438 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1439 { 1440 PetscErrorCode ierr; 1441 PetscFunctionBegin; 1442 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1443 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 /*@ 1448 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1449 1450 Not Collective 1451 1452 Input Parameters: 1453 . mat - the matrix 1454 1455 Notes: 1456 You can only call this after a call to MatDensePlaceArray() 1457 1458 Level: developer 1459 1460 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1461 1462 @*/ 1463 PetscErrorCode MatDenseResetArray(Mat mat) 1464 { 1465 PetscErrorCode ierr; 1466 PetscFunctionBegin; 1467 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1468 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } 1471 1472 /*@C 1473 MatCreateDense - Creates a parallel matrix in dense format. 1474 1475 Collective 1476 1477 Input Parameters: 1478 + comm - MPI communicator 1479 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1480 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1481 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1482 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1483 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1484 to control all matrix memory allocation. 1485 1486 Output Parameter: 1487 . A - the matrix 1488 1489 Notes: 1490 The dense format is fully compatible with standard Fortran 77 1491 storage by columns. 1492 1493 The data input variable is intended primarily for Fortran programmers 1494 who wish to allocate their own matrix memory space. Most users should 1495 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1496 1497 The user MUST specify either the local or global matrix dimensions 1498 (possibly both). 1499 1500 Level: intermediate 1501 1502 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1503 @*/ 1504 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1505 { 1506 PetscErrorCode ierr; 1507 PetscMPIInt size; 1508 1509 PetscFunctionBegin; 1510 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1511 PetscValidLogicalCollectiveBool(*A,!!data,6); 1512 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1513 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1514 if (size > 1) { 1515 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1516 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1517 if (data) { /* user provided data array, so no need to assemble */ 1518 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1519 (*A)->assembled = PETSC_TRUE; 1520 } 1521 } else { 1522 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1523 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1524 } 1525 PetscFunctionReturn(0); 1526 } 1527 1528 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1529 { 1530 Mat mat; 1531 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1532 PetscErrorCode ierr; 1533 1534 PetscFunctionBegin; 1535 *newmat = 0; 1536 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1537 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1538 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1539 a = (Mat_MPIDense*)mat->data; 1540 1541 mat->factortype = A->factortype; 1542 mat->assembled = PETSC_TRUE; 1543 mat->preallocated = PETSC_TRUE; 1544 1545 a->size = oldmat->size; 1546 a->rank = oldmat->rank; 1547 mat->insertmode = NOT_SET_VALUES; 1548 a->nvec = oldmat->nvec; 1549 a->donotstash = oldmat->donotstash; 1550 1551 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1552 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1553 1554 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1555 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1556 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1557 1558 *newmat = mat; 1559 PetscFunctionReturn(0); 1560 } 1561 1562 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1563 { 1564 PetscErrorCode ierr; 1565 PetscBool isbinary; 1566 #if defined(PETSC_HAVE_HDF5) 1567 PetscBool ishdf5; 1568 #endif 1569 1570 PetscFunctionBegin; 1571 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 1572 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 1573 /* force binary viewer to load .info file if it has not yet done so */ 1574 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1575 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1576 #if defined(PETSC_HAVE_HDF5) 1577 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 1578 #endif 1579 if (isbinary) { 1580 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 1581 #if defined(PETSC_HAVE_HDF5) 1582 } else if (ishdf5) { 1583 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 1584 #endif 1585 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 1586 PetscFunctionReturn(0); 1587 } 1588 1589 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1590 { 1591 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1592 Mat a,b; 1593 PetscBool flg; 1594 PetscErrorCode ierr; 1595 1596 PetscFunctionBegin; 1597 a = matA->A; 1598 b = matB->A; 1599 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1600 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1601 PetscFunctionReturn(0); 1602 } 1603 1604 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1605 { 1606 PetscErrorCode ierr; 1607 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1608 Mat_TransMatMultDense *atb = a->atbdense; 1609 1610 PetscFunctionBegin; 1611 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1612 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1613 ierr = PetscFree(atb);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 1618 { 1619 PetscErrorCode ierr; 1620 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1621 Mat_MatTransMultDense *abt = a->abtdense; 1622 1623 PetscFunctionBegin; 1624 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 1625 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 1626 ierr = (abt->destroy)(A);CHKERRQ(ierr); 1627 ierr = PetscFree(abt);CHKERRQ(ierr); 1628 PetscFunctionReturn(0); 1629 } 1630 1631 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1632 { 1633 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1634 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1635 Mat_TransMatMultDense *atb = c->atbdense; 1636 PetscErrorCode ierr; 1637 MPI_Comm comm; 1638 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1639 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1640 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1641 PetscScalar _DOne=1.0,_DZero=0.0; 1642 PetscBLASInt am,an,bn,aN; 1643 const PetscInt *ranges; 1644 1645 PetscFunctionBegin; 1646 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1647 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1648 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1649 1650 /* compute atbarray = aseq^T * bseq */ 1651 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1652 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1653 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1654 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1655 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1656 1657 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1658 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1659 1660 /* arrange atbarray into sendbuf */ 1661 k = 0; 1662 for (proc=0; proc<size; proc++) { 1663 for (j=0; j<cN; j++) { 1664 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1665 } 1666 } 1667 /* sum all atbarray to local values of C */ 1668 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1669 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1670 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1671 PetscFunctionReturn(0); 1672 } 1673 1674 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 1675 { 1676 PetscErrorCode ierr; 1677 MPI_Comm comm; 1678 PetscMPIInt size; 1679 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1680 Mat_MPIDense *c; 1681 Mat_TransMatMultDense *atb; 1682 1683 PetscFunctionBegin; 1684 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1685 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1686 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1687 } 1688 1689 /* create matrix product C */ 1690 ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1691 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 1692 ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr); 1693 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1694 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1695 1696 /* create data structure for reuse C */ 1697 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1698 ierr = PetscNew(&atb);CHKERRQ(ierr); 1699 cM = C->rmap->N; 1700 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1701 1702 c = (Mat_MPIDense*)C->data; 1703 c->atbdense = atb; 1704 atb->destroy = C->ops->destroy; 1705 C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1706 PetscFunctionReturn(0); 1707 } 1708 1709 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 1710 { 1711 PetscErrorCode ierr; 1712 MPI_Comm comm; 1713 PetscMPIInt i, size; 1714 PetscInt maxRows, bufsiz; 1715 Mat_MPIDense *c; 1716 PetscMPIInt tag; 1717 PetscInt alg; 1718 Mat_MatTransMultDense *abt; 1719 Mat_Product *product = C->product; 1720 PetscBool flg; 1721 1722 PetscFunctionBegin; 1723 /* check local size of A and B */ 1724 if (A->cmap->n != B->cmap->n) { 1725 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 1726 } 1727 1728 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 1729 if (flg) { 1730 alg = 0; 1731 } else alg = 1; 1732 1733 /* setup matrix product C */ 1734 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 1735 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 1736 ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr); 1737 ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr); 1738 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1739 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1740 1741 /* create data structure for reuse C */ 1742 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 1743 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1744 ierr = PetscNew(&abt);CHKERRQ(ierr); 1745 abt->tag = tag; 1746 abt->alg = alg; 1747 switch (alg) { 1748 case 1: /* alg: "cyclic" */ 1749 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 1750 bufsiz = A->cmap->N * maxRows; 1751 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 1752 break; 1753 default: /* alg: "allgatherv" */ 1754 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 1755 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 1756 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 1757 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 1758 break; 1759 } 1760 1761 c = (Mat_MPIDense*)C->data; 1762 c->abtdense = abt; 1763 abt->destroy = C->ops->destroy; 1764 C->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 1765 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 1766 PetscFunctionReturn(0); 1767 } 1768 1769 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 1770 { 1771 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1772 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1773 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 1774 Mat_MatTransMultDense *abt = c->abtdense; 1775 PetscErrorCode ierr; 1776 MPI_Comm comm; 1777 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 1778 PetscScalar *sendbuf, *recvbuf=0, *carray; 1779 PetscInt i,cK=A->cmap->N,k,j,bn; 1780 PetscScalar _DOne=1.0,_DZero=0.0; 1781 PetscBLASInt cm, cn, ck; 1782 MPI_Request reqs[2]; 1783 const PetscInt *ranges; 1784 1785 PetscFunctionBegin; 1786 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1787 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1788 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1789 1790 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 1791 bn = B->rmap->n; 1792 if (bseq->lda == bn) { 1793 sendbuf = bseq->v; 1794 } else { 1795 sendbuf = abt->buf[0]; 1796 for (k = 0, i = 0; i < cK; i++) { 1797 for (j = 0; j < bn; j++, k++) { 1798 sendbuf[k] = bseq->v[i * bseq->lda + j]; 1799 } 1800 } 1801 } 1802 if (size > 1) { 1803 sendto = (rank + size - 1) % size; 1804 recvfrom = (rank + size + 1) % size; 1805 } else { 1806 sendto = recvfrom = 0; 1807 } 1808 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 1809 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 1810 recvisfrom = rank; 1811 for (i = 0; i < size; i++) { 1812 /* we have finished receiving in sending, bufs can be read/modified */ 1813 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 1814 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 1815 1816 if (nextrecvisfrom != rank) { 1817 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 1818 sendsiz = cK * bn; 1819 recvsiz = cK * nextbn; 1820 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 1821 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 1822 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 1823 } 1824 1825 /* local aseq * sendbuf^T */ 1826 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 1827 carray = &cseq->v[cseq->lda * ranges[recvisfrom]]; 1828 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda)); 1829 1830 if (nextrecvisfrom != rank) { 1831 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 1832 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1833 } 1834 bn = nextbn; 1835 recvisfrom = nextrecvisfrom; 1836 sendbuf = recvbuf; 1837 } 1838 PetscFunctionReturn(0); 1839 } 1840 1841 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 1842 { 1843 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1844 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1845 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 1846 Mat_MatTransMultDense *abt = c->abtdense; 1847 PetscErrorCode ierr; 1848 MPI_Comm comm; 1849 PetscMPIInt rank,size; 1850 PetscScalar *sendbuf, *recvbuf; 1851 PetscInt i,cK=A->cmap->N,k,j,bn; 1852 PetscScalar _DOne=1.0,_DZero=0.0; 1853 PetscBLASInt cm, cn, ck; 1854 1855 PetscFunctionBegin; 1856 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1857 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1858 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1859 1860 /* copy transpose of B into buf[0] */ 1861 bn = B->rmap->n; 1862 sendbuf = abt->buf[0]; 1863 recvbuf = abt->buf[1]; 1864 for (k = 0, j = 0; j < bn; j++) { 1865 for (i = 0; i < cK; i++, k++) { 1866 sendbuf[k] = bseq->v[i * bseq->lda + j]; 1867 } 1868 } 1869 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 1870 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 1871 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 1872 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 1873 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda)); 1874 PetscFunctionReturn(0); 1875 } 1876 1877 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1878 { 1879 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1880 Mat_MatTransMultDense *abt = c->abtdense; 1881 PetscErrorCode ierr; 1882 1883 PetscFunctionBegin; 1884 switch (abt->alg) { 1885 case 1: 1886 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 1887 break; 1888 default: 1889 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 1890 break; 1891 } 1892 PetscFunctionReturn(0); 1893 } 1894 1895 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1896 { 1897 PetscErrorCode ierr; 1898 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1899 Mat_MatMultDense *ab = a->abdense; 1900 1901 PetscFunctionBegin; 1902 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1903 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1904 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1905 1906 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1907 ierr = PetscFree(ab);CHKERRQ(ierr); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 #if defined(PETSC_HAVE_ELEMENTAL) 1912 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1913 { 1914 PetscErrorCode ierr; 1915 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1916 Mat_MatMultDense *ab=c->abdense; 1917 1918 PetscFunctionBegin; 1919 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1920 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1921 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1922 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1923 PetscFunctionReturn(0); 1924 } 1925 1926 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 1927 { 1928 PetscErrorCode ierr; 1929 Mat Ae,Be,Ce; 1930 Mat_MPIDense *c; 1931 Mat_MatMultDense *ab; 1932 1933 PetscFunctionBegin; 1934 /* check local size of A and B */ 1935 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1936 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1937 } 1938 1939 /* create elemental matrices Ae and Be */ 1940 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 1941 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1942 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 1943 ierr = MatSetUp(Ae);CHKERRQ(ierr); 1944 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1945 1946 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 1947 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 1948 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 1949 ierr = MatSetUp(Be);CHKERRQ(ierr); 1950 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1951 1952 /* compute symbolic Ce = Ae*Be */ 1953 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 1954 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 1955 1956 /* setup C */ 1957 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1958 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 1959 ierr = MatSetUp(C);CHKERRQ(ierr); 1960 1961 /* create data structure for reuse Cdense */ 1962 ierr = PetscNew(&ab);CHKERRQ(ierr); 1963 c = (Mat_MPIDense*)C->data; 1964 c->abdense = ab; 1965 1966 ab->Ae = Ae; 1967 ab->Be = Be; 1968 ab->Ce = Ce; 1969 ab->destroy = C->ops->destroy; 1970 C->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1971 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1972 C->ops->productnumeric = MatProductNumeric_AB; 1973 PetscFunctionReturn(0); 1974 } 1975 #endif 1976 /* ----------------------------------------------- */ 1977 #if defined(PETSC_HAVE_ELEMENTAL) 1978 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 1979 { 1980 PetscFunctionBegin; 1981 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 1982 C->ops->productsymbolic = MatProductSymbolic_AB; 1983 C->ops->productnumeric = MatProductNumeric_AB; 1984 PetscFunctionReturn(0); 1985 } 1986 #endif 1987 1988 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C) 1989 { 1990 PetscErrorCode ierr; 1991 Mat_Product *product = C->product; 1992 1993 PetscFunctionBegin; 1994 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr); 1995 C->ops->productnumeric = MatProductNumeric_AtB; 1996 C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense; 1997 PetscFunctionReturn(0); 1998 } 1999 2000 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2001 { 2002 Mat_Product *product = C->product; 2003 Mat A = product->A,B=product->B; 2004 2005 PetscFunctionBegin; 2006 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2007 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2008 2009 C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense; 2010 PetscFunctionReturn(0); 2011 } 2012 2013 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2014 { 2015 PetscErrorCode ierr; 2016 Mat_Product *product = C->product; 2017 const char *algTypes[2] = {"allgatherv","cyclic"}; 2018 PetscInt alg,nalg = 2; 2019 PetscBool flg = PETSC_FALSE; 2020 2021 PetscFunctionBegin; 2022 /* Set default algorithm */ 2023 alg = 0; /* default is allgatherv */ 2024 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2025 if (flg) { 2026 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2027 } 2028 2029 /* Get runtime option */ 2030 if (product->api_user) { 2031 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2032 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2033 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2034 } else { 2035 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2036 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2037 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2038 } 2039 if (flg) { 2040 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2041 } 2042 2043 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2044 C->ops->productsymbolic = MatProductSymbolic_ABt; 2045 C->ops->productnumeric = MatProductNumeric_ABt; 2046 PetscFunctionReturn(0); 2047 } 2048 2049 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2050 { 2051 PetscErrorCode ierr; 2052 Mat_Product *product = C->product; 2053 2054 PetscFunctionBegin; 2055 switch (product->type) { 2056 #if defined(PETSC_HAVE_ELEMENTAL) 2057 case MATPRODUCT_AB: 2058 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 2059 break; 2060 #endif 2061 case MATPRODUCT_AtB: 2062 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 2063 break; 2064 case MATPRODUCT_ABt: 2065 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 2066 break; 2067 default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type is not supported"); 2068 } 2069 PetscFunctionReturn(0); 2070 } 2071