1 #ifndef lint 2 static char vcid[] = "$Id: mpidense.c,v 1.7 1995/11/01 19:10:02 bsmith Exp bsmith $"; 3 #endif 4 5 #include "mpidense.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n, 10 int *idxn,Scalar *v,InsertMode addv) 11 { 12 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 13 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 14 15 if (mdn->insertmode != NOT_SET_VALUES && mdn->insertmode != addv) { 16 SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 17 } 18 mdn->insertmode = addv; 19 for ( i=0; i<m; i++ ) { 20 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 21 if (idxm[i] >= mdn->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 22 if (idxm[i] >= rstart && idxm[i] < rend) { 23 row = idxm[i] - rstart; 24 for ( j=0; j<n; j++ ) { 25 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 26 if (idxn[j] >= mdn->N) 27 SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 28 ierr = MatSetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j,addv); 29 CHKERRQ(ierr); 30 } 31 } 32 else { 33 ierr = StashValues_Private(&mdn->stash,idxm[i],n,idxn,v+i*n,addv); 34 CHKERRQ(ierr); 35 } 36 } 37 return 0; 38 } 39 40 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 41 { 42 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 43 MPI_Comm comm = mat->comm; 44 int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 45 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 46 int tag = mat->tag, *owner,*starts,count,ierr; 47 InsertMode addv; 48 MPI_Request *send_waits,*recv_waits; 49 Scalar *rvalues,*svalues; 50 51 /* make sure all processors are either in INSERTMODE or ADDMODE */ 52 MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT, 53 MPI_BOR,comm); 54 if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1, 55 "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 56 } 57 mdn->insertmode = addv; /* in case this processor had no cache */ 58 59 /* first count number of contributors to each processor */ 60 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 61 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 62 owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 63 for ( i=0; i<mdn->stash.n; i++ ) { 64 idx = mdn->stash.idx[i]; 65 for ( j=0; j<size; j++ ) { 66 if (idx >= owners[j] && idx < owners[j+1]) { 67 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 68 } 69 } 70 } 71 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 72 73 /* inform other processors of number of messages and max length*/ 74 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 75 MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm); 76 nreceives = work[rank]; 77 MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm); 78 nmax = work[rank]; 79 PetscFree(work); 80 81 /* post receives: 82 1) each message will consist of ordered pairs 83 (global index,value) we store the global index as a double 84 to simplify the message passing. 85 2) since we don't know how long each individual message is we 86 allocate the largest needed buffer for each receive. Potentially 87 this is a lot of wasted space. 88 89 This could be done better. 90 */ 91 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 92 CHKPTRQ(rvalues); 93 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 94 CHKPTRQ(recv_waits); 95 for ( i=0; i<nreceives; i++ ) { 96 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 97 comm,recv_waits+i); 98 } 99 100 /* do sends: 101 1) starts[i] gives the starting index in svalues for stuff going to 102 the ith processor 103 */ 104 svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) ); 105 CHKPTRQ(svalues); 106 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 107 CHKPTRQ(send_waits); 108 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 109 starts[0] = 0; 110 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 111 for ( i=0; i<mdn->stash.n; i++ ) { 112 svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 113 svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 114 svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 115 } 116 PetscFree(owner); 117 starts[0] = 0; 118 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 119 count = 0; 120 for ( i=0; i<size; i++ ) { 121 if (procs[i]) { 122 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 123 comm,send_waits+count++); 124 } 125 } 126 PetscFree(starts); PetscFree(nprocs); 127 128 /* Free cache space */ 129 ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 130 131 mdn->svalues = svalues; mdn->rvalues = rvalues; 132 mdn->nsends = nsends; mdn->nrecvs = nreceives; 133 mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 134 mdn->rmax = nmax; 135 136 return 0; 137 } 138 extern int MatSetUpMultiply_MPIDense(Mat); 139 140 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 141 { 142 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 143 MPI_Status *send_status,recv_status; 144 int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 145 Scalar *values,val; 146 InsertMode addv = mdn->insertmode; 147 148 /* wait on receives */ 149 while (count) { 150 MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 151 /* unpack receives into our local space */ 152 values = mdn->rvalues + 3*imdex*mdn->rmax; 153 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 154 n = n/3; 155 for ( i=0; i<n; i++ ) { 156 row = (int) PETSCREAL(values[3*i]) - mdn->rstart; 157 col = (int) PETSCREAL(values[3*i+1]); 158 val = values[3*i+2]; 159 if (col >= 0 && col < mdn->N) { 160 MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 161 } 162 else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 163 } 164 count--; 165 } 166 PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 167 168 /* wait on sends */ 169 if (mdn->nsends) { 170 send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) ); 171 CHKPTRQ(send_status); 172 MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 173 PetscFree(send_status); 174 } 175 PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 176 177 mdn->insertmode = NOT_SET_VALUES; 178 ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 179 ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 180 181 if (!mdn->assembled && mode == FINAL_ASSEMBLY) { 182 ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 183 } 184 mdn->assembled = 1; 185 return 0; 186 } 187 188 static int MatZeroEntries_MPIDense(Mat A) 189 { 190 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 191 return MatZeroEntries(l->A); 192 } 193 194 /* the code does not do the diagonal entries correctly unless the 195 matrix is square and the column and row owerships are identical. 196 This is a BUG. The only way to fix it seems to be to access 197 mdn->A and mdn->B directly and not through the MatZeroRows() 198 routine. 199 */ 200 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 201 { 202 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 203 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 204 int *procs,*nprocs,j,found,idx,nsends,*work; 205 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 206 int *rvalues,tag = A->tag,count,base,slen,n,*source; 207 int *lens,imdex,*lrows,*values; 208 MPI_Comm comm = A->comm; 209 MPI_Request *send_waits,*recv_waits; 210 MPI_Status recv_status,*send_status; 211 IS istmp; 212 213 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix"); 214 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 215 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 216 217 /* first count number of contributors to each processor */ 218 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 219 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 220 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 221 for ( i=0; i<N; i++ ) { 222 idx = rows[i]; 223 found = 0; 224 for ( j=0; j<size; j++ ) { 225 if (idx >= owners[j] && idx < owners[j+1]) { 226 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 227 } 228 } 229 if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 230 } 231 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 232 233 /* inform other processors of number of messages and max length*/ 234 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 235 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 236 nrecvs = work[rank]; 237 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 238 nmax = work[rank]; 239 PetscFree(work); 240 241 /* post receives: */ 242 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 243 CHKPTRQ(rvalues); 244 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 245 CHKPTRQ(recv_waits); 246 for ( i=0; i<nrecvs; i++ ) { 247 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 248 } 249 250 /* do sends: 251 1) starts[i] gives the starting index in svalues for stuff going to 252 the ith processor 253 */ 254 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 255 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 256 CHKPTRQ(send_waits); 257 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 258 starts[0] = 0; 259 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 260 for ( i=0; i<N; i++ ) { 261 svalues[starts[owner[i]]++] = rows[i]; 262 } 263 ISRestoreIndices(is,&rows); 264 265 starts[0] = 0; 266 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 267 count = 0; 268 for ( i=0; i<size; i++ ) { 269 if (procs[i]) { 270 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 271 } 272 } 273 PetscFree(starts); 274 275 base = owners[rank]; 276 277 /* wait on receives */ 278 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 279 source = lens + nrecvs; 280 count = nrecvs; slen = 0; 281 while (count) { 282 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 283 /* unpack receives into our local space */ 284 MPI_Get_count(&recv_status,MPI_INT,&n); 285 source[imdex] = recv_status.MPI_SOURCE; 286 lens[imdex] = n; 287 slen += n; 288 count--; 289 } 290 PetscFree(recv_waits); 291 292 /* move the data into the send scatter */ 293 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 294 count = 0; 295 for ( i=0; i<nrecvs; i++ ) { 296 values = rvalues + i*nmax; 297 for ( j=0; j<lens[i]; j++ ) { 298 lrows[count++] = values[j] - base; 299 } 300 } 301 PetscFree(rvalues); PetscFree(lens); 302 PetscFree(owner); PetscFree(nprocs); 303 304 /* actually zap the local rows */ 305 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 306 PLogObjectParent(A,istmp); 307 PetscFree(lrows); 308 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 309 ierr = ISDestroy(istmp); CHKERRQ(ierr); 310 311 /* wait on sends */ 312 if (nsends) { 313 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 314 CHKPTRQ(send_status); 315 MPI_Waitall(nsends,send_waits,send_status); 316 PetscFree(send_status); 317 } 318 PetscFree(send_waits); PetscFree(svalues); 319 320 return 0; 321 } 322 323 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 324 { 325 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 326 int ierr; 327 if (!mdn->assembled) 328 SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first"); 329 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 330 CHKERRQ(ierr); 331 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 332 CHKERRQ(ierr); 333 ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 334 return 0; 335 } 336 337 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 338 { 339 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 340 int ierr; 341 if (!mdn->assembled) 342 SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first"); 343 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 344 CHKERRQ(ierr); 345 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 346 CHKERRQ(ierr); 347 ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 348 return 0; 349 } 350 351 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 354 int ierr; 355 Scalar zero = 0.0; 356 357 if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIDense:must assemble matrix"); 358 ierr = VecSet(&zero,yy); CHKERRQ(ierr); 359 ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 360 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 361 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 362 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 363 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 364 return 0; 365 } 366 367 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 368 { 369 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 370 int ierr; 371 372 if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIDense:must assemble matrix"); 373 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 374 ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 375 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 376 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 377 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 378 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 379 return 0; 380 } 381 382 static int MatGetDiagonal_MPIDense(Mat A,Vec v) 383 { 384 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 385 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 386 int ierr, i, n, m = a->m, radd; 387 Scalar *x; 388 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix"); 389 ierr = VecGetArray(v,&x); CHKERRQ(ierr); 390 ierr = VecGetSize(v,&n); CHKERRQ(ierr); 391 if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec"); 392 radd = a->rstart*m*m; 393 for ( i=0; i<m; i++ ) { 394 x[i] = aloc->v[radd + i*m + i]; 395 } 396 return 0; 397 } 398 399 static int MatDestroy_MPIDense(PetscObject obj) 400 { 401 Mat mat = (Mat) obj; 402 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 403 int ierr; 404 #if defined(PETSC_LOG) 405 PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 406 #endif 407 PetscFree(mdn->rowners); 408 ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 409 if (mdn->lvec) VecDestroy(mdn->lvec); 410 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 411 PetscFree(mdn); 412 PLogObjectDestroy(mat); 413 PetscHeaderDestroy(mat); 414 return 0; 415 } 416 417 #include "pinclude/pviewer.h" 418 419 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 420 { 421 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 422 int ierr; 423 if (mdn->size == 1) { 424 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 425 } 426 else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 427 return 0; 428 } 429 430 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 431 { 432 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 433 int ierr, format; 434 PetscObject vobj = (PetscObject) viewer; 435 FILE *fd; 436 437 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 438 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 439 ierr = ViewerFileGetFormat_Private(viewer,&format); 440 if (format == FILE_FORMAT_INFO_DETAILED) { 441 int nz, nzalloc, mem, rank; 442 MPI_Comm_rank(mat->comm,&rank); 443 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 444 MPIU_Seq_begin(mat->comm,1); 445 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n", 446 rank,mdn->m,nz,nzalloc,mem); 447 fflush(fd); 448 MPIU_Seq_end(mat->comm,1); 449 ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 450 return 0; 451 } 452 else if (format == FILE_FORMAT_INFO) { 453 return 0; 454 } 455 } 456 if (vobj->type == ASCII_FILE_VIEWER) { 457 MPIU_Seq_begin(mat->comm,1); 458 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 459 mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 460 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 461 fflush(fd); 462 MPIU_Seq_end(mat->comm,1); 463 } 464 else { 465 int size = mdn->size, rank = mdn->rank; 466 if (size == 1) { 467 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 468 } 469 else { 470 /* assemble the entire matrix onto first processor. */ 471 Mat A; 472 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 473 Scalar *vals; 474 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 475 476 if (!rank) { 477 ierr = MatCreateMPIDense(mat->comm,M,M,N,N,&A); CHKERRQ(ierr); 478 } 479 else { 480 ierr = MatCreateMPIDense(mat->comm,0,M,N,N,&A); CHKERRQ(ierr); 481 } 482 PLogObjectParent(mat,A); 483 484 /* Copy the matrix ... This isn't the most efficient means, 485 but it's quick for now */ 486 row = mdn->rstart; m = Amdn->m; 487 for ( i=0; i<m; i++ ) { 488 ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 489 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 490 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 491 row++; 492 } 493 494 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 495 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 496 if (!rank) { 497 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 498 } 499 ierr = MatDestroy(A); CHKERRQ(ierr); 500 } 501 } 502 return 0; 503 } 504 505 static int MatView_MPIDense(PetscObject obj,Viewer viewer) 506 { 507 Mat mat = (Mat) obj; 508 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 509 PetscObject vobj = (PetscObject) viewer; 510 int ierr; 511 512 if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix"); 513 if (!viewer) { 514 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 515 } 516 if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 517 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 518 } 519 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 520 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 521 } 522 else if (vobj->type == BINARY_FILE_VIEWER) { 523 return MatView_MPIDense_Binary(mat,viewer); 524 } 525 return 0; 526 } 527 528 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz, 529 int *nzalloc,int *mem) 530 { 531 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 532 Mat mdn = mat->A; 533 int ierr, isend[3], irecv[3]; 534 535 ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 536 if (flag == MAT_LOCAL) { 537 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 538 } else if (flag == MAT_GLOBAL_MAX) { 539 MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm); 540 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 541 } else if (flag == MAT_GLOBAL_SUM) { 542 MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm); 543 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 544 } 545 return 0; 546 } 547 548 static int MatSetOption_MPIDense(Mat A,MatOption op) 549 { 550 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 551 552 if (op == NO_NEW_NONZERO_LOCATIONS || 553 op == YES_NEW_NONZERO_LOCATIONS || 554 op == COLUMNS_SORTED || 555 op == ROW_ORIENTED) { 556 MatSetOption(a->A,op); 557 } 558 else if (op == ROWS_SORTED || 559 op == SYMMETRIC_MATRIX || 560 op == STRUCTURALLY_SYMMETRIC_MATRIX || 561 op == YES_NEW_DIAGONALS) 562 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n"); 563 else if (op == COLUMN_ORIENTED) 564 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:COLUMN_ORIENTED");} 565 else if (op == NO_NEW_DIAGONALS) 566 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");} 567 else 568 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 569 return 0; 570 } 571 572 static int MatGetSize_MPIDense(Mat A,int *m,int *n) 573 { 574 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 575 *m = mat->M; *n = mat->N; 576 return 0; 577 } 578 579 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 580 { 581 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 582 *m = mat->m; *n = mat->N; 583 return 0; 584 } 585 586 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 587 { 588 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 589 *m = mat->rstart; *n = mat->rend; 590 return 0; 591 } 592 593 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 594 { 595 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 596 int lrow, rstart = mat->rstart, rend = mat->rend; 597 598 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 599 lrow = row - rstart; 600 return MatGetRow(mat->A,lrow,nz,idx,v); 601 } 602 603 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 604 { 605 if (idx) PetscFree(*idx); 606 if (v) PetscFree(*v); 607 return 0; 608 } 609 610 static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 611 { 612 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 613 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 614 int ierr, i, j; 615 double sum = 0.0; 616 Scalar *v = mat->v; 617 618 if (!mdn->assembled) SETERRQ(1,"MatNorm_MPIDense:Must assemble matrix"); 619 if (mdn->size == 1) { 620 ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 621 } else { 622 if (type == NORM_FROBENIUS) { 623 for (i=0; i<mat->n*mat->m; i++ ) { 624 #if defined(PETSC_COMPLEX) 625 sum += real(conj(*v)*(*v)); v++; 626 #else 627 sum += (*v)*(*v); v++; 628 #endif 629 } 630 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 631 *norm = sqrt(*norm); 632 PLogFlops(2*mat->n*mat->m); 633 } 634 else if (type == NORM_1) { 635 double *tmp, *tmp2; 636 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 637 tmp2 = tmp + mdn->N; 638 PetscMemzero(tmp,2*mdn->N*sizeof(double)); 639 *norm = 0.0; 640 v = mat->v; 641 for ( j=0; j<mat->n; j++ ) { 642 for ( i=0; i<mat->m; i++ ) { 643 tmp[j] += PetscAbsScalar(*v++); 644 } 645 } 646 MPI_Allreduce((void*)tmp,(void*)tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 647 for ( j=0; j<mdn->N; j++ ) { 648 if (tmp2[j] > *norm) *norm = tmp2[j]; 649 } 650 PetscFree(tmp); 651 PLogFlops(mat->n*mat->m); 652 } 653 else if (type == NORM_INFINITY) { /* max row norm */ 654 double ntemp; 655 ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 656 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 657 } 658 else { 659 SETERRQ(1,"MatNorm_MPIDense:No support for two norm"); 660 } 661 } 662 return 0; 663 } 664 665 static int MatTranspose_MPIDense(Mat A,Mat *matout) 666 { 667 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 668 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 669 Mat B; 670 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 671 int j, i, ierr; 672 Scalar *v; 673 674 if (!matout && M != N) 675 SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place"); 676 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B); CHKERRQ(ierr); 677 678 m = Aloc->m; n = Aloc->n; v = Aloc->v; 679 rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 680 for ( j=0; j<n; j++ ) { 681 for (i=0; i<m; i++) rwork[i] = rstart + i; 682 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 683 v += m; 684 } 685 PetscFree(rwork); 686 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 688 if (matout) { 689 *matout = B; 690 } else { 691 /* This isn't really an in-place transpose, but free data struct from a */ 692 PetscFree(a->rowners); 693 ierr = MatDestroy(a->A); CHKERRQ(ierr); 694 if (a->lvec) VecDestroy(a->lvec); 695 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 696 PetscFree(a); 697 PetscMemcpy(A,B,sizeof(struct _Mat)); 698 PetscHeaderDestroy(B); 699 } 700 return 0; 701 } 702 703 static int MatCopyPrivate_MPIDense(Mat,Mat *,int); 704 705 /* -------------------------------------------------------------------*/ 706 static struct _MatOps MatOps = {MatSetValues_MPIDense, 707 MatGetRow_MPIDense,MatRestoreRow_MPIDense, 708 MatMult_MPIDense,MatMultAdd_MPIDense, 709 MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 710 0,0, 711 0,0,0, 712 0,0,MatTranspose_MPIDense, 713 MatGetInfo_MPIDense,0, 714 MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 715 MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 716 0, 717 MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 718 0, 719 0,0,0,0, 720 MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 721 MatGetOwnershipRange_MPIDense, 722 0,0, 723 0,0,0,0,0,MatCopyPrivate_MPIDense}; 724 725 /*@C 726 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 727 728 Input Parameters: 729 . comm - MPI communicator 730 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 731 . n - number of local columns (or PETSC_DECIDE to have calculated 732 if N is given) 733 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 734 . N - number of global columns (or PETSC_DECIDE to have calculated 735 if n is given) 736 737 Output Parameter: 738 . newmat - the matrix 739 740 Notes: 741 The dense format is fully compatible with standard Fortran 77 742 storage by columns. 743 744 The user MUST specify either the local or global matrix dimensions 745 (possibly both). 746 747 Currently, the only parallel dense matrix decomposition is by rows, 748 so that n=N and each submatrix owns all of the global columns. 749 750 .keywords: matrix, dense, parallel 751 752 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 753 @*/ 754 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Mat *newmat) 755 { 756 Mat mat; 757 Mat_MPIDense *a; 758 int ierr, i; 759 760 *newmat = 0; 761 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 762 PLogObjectCreate(mat); 763 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 764 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 765 mat->destroy = MatDestroy_MPIDense; 766 mat->view = MatView_MPIDense; 767 mat->factor = 0; 768 769 a->insertmode = NOT_SET_VALUES; 770 MPI_Comm_rank(comm,&a->rank); 771 MPI_Comm_size(comm,&a->size); 772 773 if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 774 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 775 776 /* each row stores all columns */ 777 if (N == PETSC_DECIDE) N = n; 778 if (n == PETSC_DECIDE) n = N; 779 if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); 780 a->N = N; 781 a->M = M; 782 a->m = m; 783 a->n = n; 784 785 /* build local table of row and column ownerships */ 786 a->rowners = (int *) PetscMalloc((a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 787 PLogObjectMemory(mat,(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 788 sizeof(Mat_MPIDense)); 789 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 790 a->rowners[0] = 0; 791 for ( i=2; i<=a->size; i++ ) { 792 a->rowners[i] += a->rowners[i-1]; 793 } 794 a->rstart = a->rowners[a->rank]; 795 a->rend = a->rowners[a->rank+1]; 796 797 ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,&a->A); CHKERRQ(ierr); 798 PLogObjectParent(mat,a->A); 799 800 /* build cache for off array entries formed */ 801 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 802 803 /* stuff used for matrix vector multiply */ 804 a->lvec = 0; 805 a->Mvctx = 0; 806 a->assembled = 0; 807 808 *newmat = mat; 809 return 0; 810 } 811 812 static int MatCopyPrivate_MPIDense(Mat A,Mat *newmat,int cpvalues) 813 { 814 Mat mat; 815 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 816 int ierr; 817 818 if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIDense:Must assemble matrix"); 819 *newmat = 0; 820 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 821 PLogObjectCreate(mat); 822 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 823 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 824 mat->destroy = MatDestroy_MPIDense; 825 mat->view = MatView_MPIDense; 826 mat->factor = A->factor; 827 828 a->m = oldmat->m; 829 a->n = oldmat->n; 830 a->M = oldmat->M; 831 a->N = oldmat->N; 832 833 a->assembled = 1; 834 a->rstart = oldmat->rstart; 835 a->rend = oldmat->rend; 836 a->size = oldmat->size; 837 a->rank = oldmat->rank; 838 a->insertmode = NOT_SET_VALUES; 839 840 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 841 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 842 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 843 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 844 845 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 846 PLogObjectParent(mat,a->lvec); 847 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 848 PLogObjectParent(mat,a->Mvctx); 849 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 850 PLogObjectParent(mat,a->A); 851 *newmat = mat; 852 return 0; 853 } 854 855 #include "sysio.h" 856 857 int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat) 858 { 859 Mat A; 860 int i, nz, ierr, j,rstart, rend, fd; 861 Scalar *vals,*svals; 862 PetscObject vobj = (PetscObject) bview; 863 MPI_Comm comm = vobj->comm; 864 MPI_Status status; 865 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 866 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 867 int tag = ((PetscObject)bview)->tag; 868 869 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 870 if (!rank) { 871 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 872 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 873 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 874 } 875 876 MPI_Bcast(header+1,3,MPI_INT,0,comm); 877 M = header[1]; N = header[2]; 878 /* determine ownership of all rows */ 879 m = M/size + ((M % size) > rank); 880 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 881 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 882 rowners[0] = 0; 883 for ( i=2; i<=size; i++ ) { 884 rowners[i] += rowners[i-1]; 885 } 886 rstart = rowners[rank]; 887 rend = rowners[rank+1]; 888 889 /* distribute row lengths to all processors */ 890 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 891 offlens = ourlens + (rend-rstart); 892 if (!rank) { 893 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 894 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 895 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 896 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 897 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 898 PetscFree(sndcounts); 899 } 900 else { 901 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 902 } 903 904 if (!rank) { 905 /* calculate the number of nonzeros on each processor */ 906 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 907 PetscMemzero(procsnz,size*sizeof(int)); 908 for ( i=0; i<size; i++ ) { 909 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 910 procsnz[i] += rowlengths[j]; 911 } 912 } 913 PetscFree(rowlengths); 914 915 /* determine max buffer needed and allocate it */ 916 maxnz = 0; 917 for ( i=0; i<size; i++ ) { 918 maxnz = PetscMax(maxnz,procsnz[i]); 919 } 920 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 921 922 /* read in my part of the matrix column indices */ 923 nz = procsnz[0]; 924 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 925 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 926 927 /* read in every one elses and ship off */ 928 for ( i=1; i<size; i++ ) { 929 nz = procsnz[i]; 930 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 931 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 932 } 933 PetscFree(cols); 934 } 935 else { 936 /* determine buffer space needed for message */ 937 nz = 0; 938 for ( i=0; i<m; i++ ) { 939 nz += ourlens[i]; 940 } 941 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 942 943 /* receive message of column indices*/ 944 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 945 MPI_Get_count(&status,MPI_INT,&maxnz); 946 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 947 } 948 949 /* loop over local rows, determining number of off diagonal entries */ 950 PetscMemzero(offlens,m*sizeof(int)); 951 jj = 0; 952 for ( i=0; i<m; i++ ) { 953 for ( j=0; j<ourlens[i]; j++ ) { 954 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 955 jj++; 956 } 957 } 958 959 /* create our matrix */ 960 for ( i=0; i<m; i++ ) { 961 ourlens[i] -= offlens[i]; 962 } 963 if (type == MATMPIDENSE) { 964 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr); 965 } 966 A = *newmat; 967 for ( i=0; i<m; i++ ) { 968 ourlens[i] += offlens[i]; 969 } 970 971 if (!rank) { 972 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 973 974 /* read in my part of the matrix numerical values */ 975 nz = procsnz[0]; 976 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 977 978 /* insert into matrix */ 979 jj = rstart; 980 smycols = mycols; 981 svals = vals; 982 for ( i=0; i<m; i++ ) { 983 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 984 smycols += ourlens[i]; 985 svals += ourlens[i]; 986 jj++; 987 } 988 989 /* read in other processors and ship out */ 990 for ( i=1; i<size; i++ ) { 991 nz = procsnz[i]; 992 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 993 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 994 } 995 PetscFree(procsnz); 996 } 997 else { 998 /* receive numeric values */ 999 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1000 1001 /* receive message of values*/ 1002 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1003 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1004 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 1005 1006 /* insert into matrix */ 1007 jj = rstart; 1008 smycols = mycols; 1009 svals = vals; 1010 for ( i=0; i<m; i++ ) { 1011 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1012 smycols += ourlens[i]; 1013 svals += ourlens[i]; 1014 jj++; 1015 } 1016 } 1017 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1018 1019 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1020 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1021 return 0; 1022 } 1023