1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.210 1997/07/16 22:00:13 balay Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 24 PLogObjectMemory(mat,aij->N*sizeof(int)); 25 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 27 return 0; 28 } 29 30 extern int DisAssemble_MPIAIJ(Mat); 31 32 #undef __FUNC__ 33 #define __FUNC__ "MatGetRowIJ_MPIAIJ" 34 int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 35 PetscTruth *done) 36 { 37 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 38 int ierr; 39 if (aij->size == 1) { 40 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 41 } else SETERRQ(1,0,"not supported in parallel"); 42 return 0; 43 } 44 45 #undef __FUNC__ 46 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */ 47 int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 48 PetscTruth *done) 49 { 50 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 51 int ierr; 52 if (aij->size == 1) { 53 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54 } else SETERRQ(1,0,"not supported in parallel"); 55 return 0; 56 } 57 58 #define CHUNKSIZE 15 59 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 60 { \ 61 \ 62 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 63 rmax = aimax[row]; nrow = ailen[row]; \ 64 col1 = col - shift; \ 65 \ 66 low = 0; high = nrow; \ 67 while (high-low > 5) { \ 68 t = (low+high)/2; \ 69 if (rp[t] > col) high = t; \ 70 else low = t; \ 71 } \ 72 for ( _i=0; _i<nrow; _i++ ) { \ 73 if (rp[_i] > col1) break; \ 74 if (rp[_i] == col1) { \ 75 if (addv == ADD_VALUES) ap[_i] += value; \ 76 else ap[_i] = value; \ 77 goto a_noinsert; \ 78 } \ 79 } \ 80 if (nonew == 1) goto a_noinsert; \ 81 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 82 if (nrow >= rmax) { \ 83 /* there is no extra room in row, therefore enlarge */ \ 84 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 85 Scalar *new_a; \ 86 \ 87 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 88 \ 89 /* malloc new storage space */ \ 90 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 91 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 92 new_j = (int *) (new_a + new_nz); \ 93 new_i = new_j + new_nz; \ 94 \ 95 /* copy over old data into new slots */ \ 96 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 97 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 98 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 99 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 100 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 101 len*sizeof(int)); \ 102 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 103 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 104 len*sizeof(Scalar)); \ 105 /* free up old matrix storage */ \ 106 \ 107 PetscFree(a->a); \ 108 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 109 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 110 a->singlemalloc = 1; \ 111 \ 112 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 113 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 114 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 115 a->maxnz += CHUNKSIZE; \ 116 a->reallocs++; \ 117 } \ 118 N = nrow++ - 1; a->nz++; \ 119 /* shift up all the later entries in this row */ \ 120 for ( ii=N; ii>=_i; ii-- ) { \ 121 rp[ii+1] = rp[ii]; \ 122 ap[ii+1] = ap[ii]; \ 123 } \ 124 rp[_i] = col1; \ 125 ap[_i] = value; \ 126 a_noinsert: ; \ 127 ailen[row] = nrow; \ 128 } 129 130 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 131 { \ 132 \ 133 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 134 rmax = bimax[row]; nrow = bilen[row]; \ 135 col1 = col - shift; \ 136 \ 137 low = 0; high = nrow; \ 138 while (high-low > 5) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > col) high = t; \ 141 else low = t; \ 142 } \ 143 for ( _i=0; _i<nrow; _i++ ) { \ 144 if (rp[_i] > col1) break; \ 145 if (rp[_i] == col1) { \ 146 if (addv == ADD_VALUES) ap[_i] += value; \ 147 else ap[_i] = value; \ 148 goto b_noinsert; \ 149 } \ 150 } \ 151 if (nonew == 1) goto b_noinsert; \ 152 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 153 if (nrow >= rmax) { \ 154 /* there is no extra room in row, therefore enlarge */ \ 155 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 156 Scalar *new_a; \ 157 \ 158 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 159 \ 160 /* malloc new storage space */ \ 161 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 162 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 163 new_j = (int *) (new_a + new_nz); \ 164 new_i = new_j + new_nz; \ 165 \ 166 /* copy over old data into new slots */ \ 167 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 168 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 169 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 170 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 171 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 172 len*sizeof(int)); \ 173 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 174 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 175 len*sizeof(Scalar)); \ 176 /* free up old matrix storage */ \ 177 \ 178 PetscFree(b->a); \ 179 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 180 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 181 b->singlemalloc = 1; \ 182 \ 183 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 184 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 185 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 186 b->maxnz += CHUNKSIZE; \ 187 b->reallocs++; \ 188 } \ 189 N = nrow++ - 1; b->nz++; \ 190 /* shift up all the later entries in this row */ \ 191 for ( ii=N; ii>=_i; ii-- ) { \ 192 rp[ii+1] = rp[ii]; \ 193 ap[ii+1] = ap[ii]; \ 194 } \ 195 rp[_i] = col1; \ 196 ap[_i] = value; \ 197 b_noinsert: ; \ 198 bilen[row] = nrow; \ 199 } 200 201 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 202 #undef __FUNC__ 203 #define __FUNC__ "MatSetValues_MPIAIJ" 204 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 205 { 206 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 207 Scalar value; 208 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 209 int cstart = aij->cstart, cend = aij->cend,row,col; 210 int roworiented = aij->roworiented; 211 212 /* Some Variables required in the macro */ 213 Mat A = aij->A; 214 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 215 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 216 Scalar *aa = a->a; 217 218 Mat B = aij->B; 219 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 220 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 221 Scalar *ba = b->a; 222 223 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 224 int nonew = a->nonew,shift = a->indexshift; 225 Scalar *ap; 226 227 for ( i=0; i<m; i++ ) { 228 #if defined(PETSC_BOPT_g) 229 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 230 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 231 #endif 232 if (im[i] >= rstart && im[i] < rend) { 233 row = im[i] - rstart; 234 for ( j=0; j<n; j++ ) { 235 if (in[j] >= cstart && in[j] < cend){ 236 col = in[j] - cstart; 237 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 238 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 239 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 240 } 241 #if defined(PETSC_BOPT_g) 242 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 243 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 244 #endif 245 else { 246 if (mat->was_assembled) { 247 if (!aij->colmap) { 248 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 249 } 250 col = aij->colmap[in[j]] - 1; 251 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 252 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 253 col = in[j]; 254 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 255 B = aij->B; 256 b = (Mat_SeqAIJ *) B->data; 257 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 258 ba = b->a; 259 } 260 } 261 else col = in[j]; 262 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 263 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 264 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 265 } 266 } 267 } 268 else { 269 if (roworiented && !aij->donotstash) { 270 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 271 } 272 else { 273 if (!aij->donotstash) { 274 row = im[i]; 275 for ( j=0; j<n; j++ ) { 276 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 277 } 278 } 279 } 280 } 281 } 282 return 0; 283 } 284 285 #undef __FUNC__ 286 #define __FUNC__ "MatGetValues_MPIAIJ" 287 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 290 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 291 int cstart = aij->cstart, cend = aij->cend,row,col; 292 293 for ( i=0; i<m; i++ ) { 294 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 295 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 296 if (idxm[i] >= rstart && idxm[i] < rend) { 297 row = idxm[i] - rstart; 298 for ( j=0; j<n; j++ ) { 299 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 300 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 301 if (idxn[j] >= cstart && idxn[j] < cend){ 302 col = idxn[j] - cstart; 303 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 304 } 305 else { 306 if (!aij->colmap) { 307 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 308 } 309 col = aij->colmap[idxn[j]] - 1; 310 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 311 else { 312 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 313 } 314 } 315 } 316 } 317 else { 318 SETERRQ(1,0,"Only local values currently supported"); 319 } 320 } 321 return 0; 322 } 323 324 #undef __FUNC__ 325 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 326 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 327 { 328 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 329 MPI_Comm comm = mat->comm; 330 int size = aij->size, *owners = aij->rowners; 331 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 332 MPI_Request *send_waits,*recv_waits; 333 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 334 InsertMode addv; 335 Scalar *rvalues,*svalues; 336 337 /* make sure all processors are either in INSERTMODE or ADDMODE */ 338 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 339 if (addv == (ADD_VALUES|INSERT_VALUES)) { 340 SETERRQ(1,0,"Some processors inserted others added"); 341 } 342 mat->insertmode = addv; /* in case this processor had no cache */ 343 344 /* first count number of contributors to each processor */ 345 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 346 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 347 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 348 for ( i=0; i<aij->stash.n; i++ ) { 349 idx = aij->stash.idx[i]; 350 for ( j=0; j<size; j++ ) { 351 if (idx >= owners[j] && idx < owners[j+1]) { 352 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 353 } 354 } 355 } 356 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 357 358 /* inform other processors of number of messages and max length*/ 359 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 360 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 361 nreceives = work[rank]; 362 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 363 nmax = work[rank]; 364 PetscFree(work); 365 366 /* post receives: 367 1) each message will consist of ordered pairs 368 (global index,value) we store the global index as a double 369 to simplify the message passing. 370 2) since we don't know how long each individual message is we 371 allocate the largest needed buffer for each receive. Potentially 372 this is a lot of wasted space. 373 374 375 This could be done better. 376 */ 377 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 378 CHKPTRQ(rvalues); 379 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 380 CHKPTRQ(recv_waits); 381 for ( i=0; i<nreceives; i++ ) { 382 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 383 comm,recv_waits+i); 384 } 385 386 /* do sends: 387 1) starts[i] gives the starting index in svalues for stuff going to 388 the ith processor 389 */ 390 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 391 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 392 CHKPTRQ(send_waits); 393 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 394 starts[0] = 0; 395 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 396 for ( i=0; i<aij->stash.n; i++ ) { 397 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 398 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 399 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 400 } 401 PetscFree(owner); 402 starts[0] = 0; 403 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 404 count = 0; 405 for ( i=0; i<size; i++ ) { 406 if (procs[i]) { 407 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 408 comm,send_waits+count++); 409 } 410 } 411 PetscFree(starts); PetscFree(nprocs); 412 413 /* Free cache space */ 414 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 415 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 416 417 aij->svalues = svalues; aij->rvalues = rvalues; 418 aij->nsends = nsends; aij->nrecvs = nreceives; 419 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 420 aij->rmax = nmax; 421 422 return 0; 423 } 424 extern int MatSetUpMultiply_MPIAIJ(Mat); 425 426 #undef __FUNC__ 427 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 428 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 429 { 430 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 431 MPI_Status *send_status,recv_status; 432 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 433 int row,col,other_disassembled; 434 Scalar *values,val; 435 InsertMode addv = mat->insertmode; 436 437 /* wait on receives */ 438 while (count) { 439 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 440 /* unpack receives into our local space */ 441 values = aij->rvalues + 3*imdex*aij->rmax; 442 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 443 n = n/3; 444 for ( i=0; i<n; i++ ) { 445 row = (int) PetscReal(values[3*i]) - aij->rstart; 446 col = (int) PetscReal(values[3*i+1]); 447 val = values[3*i+2]; 448 if (col >= aij->cstart && col < aij->cend) { 449 col -= aij->cstart; 450 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 451 } 452 else { 453 if (mat->was_assembled || mat->assembled) { 454 if (!aij->colmap) { 455 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 456 } 457 col = aij->colmap[col] - 1; 458 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 col = (int) PetscReal(values[3*i+1]); 461 } 462 } 463 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 464 } 465 } 466 count--; 467 } 468 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 469 470 /* wait on sends */ 471 if (aij->nsends) { 472 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 473 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 474 PetscFree(send_status); 475 } 476 PetscFree(aij->send_waits); PetscFree(aij->svalues); 477 478 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 479 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 480 481 /* determine if any processor has disassembled, if so we must 482 also disassemble ourselfs, in order that we may reassemble. */ 483 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 484 if (mat->was_assembled && !other_disassembled) { 485 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 486 } 487 488 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 489 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 490 } 491 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 492 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 493 494 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 495 return 0; 496 } 497 498 #undef __FUNC__ 499 #define __FUNC__ "MatZeroEntries_MPIAIJ" 500 int MatZeroEntries_MPIAIJ(Mat A) 501 { 502 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 503 int ierr; 504 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 505 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 506 return 0; 507 } 508 509 /* the code does not do the diagonal entries correctly unless the 510 matrix is square and the column and row owerships are identical. 511 This is a BUG. The only way to fix it seems to be to access 512 aij->A and aij->B directly and not through the MatZeroRows() 513 routine. 514 */ 515 #undef __FUNC__ 516 #define __FUNC__ "MatZeroRows_MPIAIJ" 517 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 518 { 519 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 520 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 521 int *procs,*nprocs,j,found,idx,nsends,*work; 522 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 523 int *rvalues,tag = A->tag,count,base,slen,n,*source; 524 int *lens,imdex,*lrows,*values; 525 MPI_Comm comm = A->comm; 526 MPI_Request *send_waits,*recv_waits; 527 MPI_Status recv_status,*send_status; 528 IS istmp; 529 530 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 531 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 532 533 /* first count number of contributors to each processor */ 534 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 535 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 536 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 537 for ( i=0; i<N; i++ ) { 538 idx = rows[i]; 539 found = 0; 540 for ( j=0; j<size; j++ ) { 541 if (idx >= owners[j] && idx < owners[j+1]) { 542 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 543 } 544 } 545 if (!found) SETERRQ(1,0,"Index out of range"); 546 } 547 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 548 549 /* inform other processors of number of messages and max length*/ 550 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 551 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 552 nrecvs = work[rank]; 553 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 554 nmax = work[rank]; 555 PetscFree(work); 556 557 /* post receives: */ 558 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 559 CHKPTRQ(rvalues); 560 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 561 CHKPTRQ(recv_waits); 562 for ( i=0; i<nrecvs; i++ ) { 563 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 564 } 565 566 /* do sends: 567 1) starts[i] gives the starting index in svalues for stuff going to 568 the ith processor 569 */ 570 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 571 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 572 CHKPTRQ(send_waits); 573 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 574 starts[0] = 0; 575 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 576 for ( i=0; i<N; i++ ) { 577 svalues[starts[owner[i]]++] = rows[i]; 578 } 579 ISRestoreIndices(is,&rows); 580 581 starts[0] = 0; 582 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 583 count = 0; 584 for ( i=0; i<size; i++ ) { 585 if (procs[i]) { 586 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 587 } 588 } 589 PetscFree(starts); 590 591 base = owners[rank]; 592 593 /* wait on receives */ 594 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 595 source = lens + nrecvs; 596 count = nrecvs; slen = 0; 597 while (count) { 598 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 599 /* unpack receives into our local space */ 600 MPI_Get_count(&recv_status,MPI_INT,&n); 601 source[imdex] = recv_status.MPI_SOURCE; 602 lens[imdex] = n; 603 slen += n; 604 count--; 605 } 606 PetscFree(recv_waits); 607 608 /* move the data into the send scatter */ 609 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 610 count = 0; 611 for ( i=0; i<nrecvs; i++ ) { 612 values = rvalues + i*nmax; 613 for ( j=0; j<lens[i]; j++ ) { 614 lrows[count++] = values[j] - base; 615 } 616 } 617 PetscFree(rvalues); PetscFree(lens); 618 PetscFree(owner); PetscFree(nprocs); 619 620 /* actually zap the local rows */ 621 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 622 PLogObjectParent(A,istmp); 623 PetscFree(lrows); 624 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 625 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 626 ierr = ISDestroy(istmp); CHKERRQ(ierr); 627 628 /* wait on sends */ 629 if (nsends) { 630 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 631 CHKPTRQ(send_status); 632 MPI_Waitall(nsends,send_waits,send_status); 633 PetscFree(send_status); 634 } 635 PetscFree(send_waits); PetscFree(svalues); 636 637 return 0; 638 } 639 640 #undef __FUNC__ 641 #define __FUNC__ "MatMult_MPIAIJ" 642 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 643 { 644 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 645 int ierr,nt; 646 647 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 648 if (nt != a->n) { 649 SETERRQ(1,0,"Incompatible partition of A and xx"); 650 } 651 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 652 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 653 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 654 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 655 return 0; 656 } 657 658 #undef __FUNC__ 659 #define __FUNC__ "MatMultAdd_MPIAIJ" 660 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 661 { 662 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 663 int ierr; 664 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 665 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 666 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 667 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 668 return 0; 669 } 670 671 #undef __FUNC__ 672 #define __FUNC__ "MatMultTrans_MPIAIJ" 673 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 674 { 675 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 676 int ierr; 677 678 /* do nondiagonal part */ 679 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 680 /* send it on its way */ 681 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 682 /* do local part */ 683 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 684 /* receive remote parts: note this assumes the values are not actually */ 685 /* inserted in yy until the next line, which is true for my implementation*/ 686 /* but is not perhaps always true. */ 687 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 688 return 0; 689 } 690 691 #undef __FUNC__ 692 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 693 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 696 int ierr; 697 698 /* do nondiagonal part */ 699 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 700 /* send it on its way */ 701 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 702 /* do local part */ 703 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 704 /* receive remote parts: note this assumes the values are not actually */ 705 /* inserted in yy until the next line, which is true for my implementation*/ 706 /* but is not perhaps always true. */ 707 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 708 return 0; 709 } 710 711 /* 712 This only works correctly for square matrices where the subblock A->A is the 713 diagonal block 714 */ 715 #undef __FUNC__ 716 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 717 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 720 if (a->M != a->N) 721 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 722 if (a->rstart != a->cstart || a->rend != a->cend) { 723 SETERRQ(1,0,"row partition must equal col partition"); } 724 return MatGetDiagonal(a->A,v); 725 } 726 727 #undef __FUNC__ 728 #define __FUNC__ "MatScale_MPIAIJ" 729 int MatScale_MPIAIJ(Scalar *aa,Mat A) 730 { 731 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 732 int ierr; 733 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 734 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 735 return 0; 736 } 737 738 #undef __FUNC__ 739 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 740 int MatDestroy_MPIAIJ(PetscObject obj) 741 { 742 Mat mat = (Mat) obj; 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 744 int ierr; 745 746 #if defined(PETSC_LOG) 747 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 748 #endif 749 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 750 PetscFree(aij->rowners); 751 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 752 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 753 if (aij->colmap) PetscFree(aij->colmap); 754 if (aij->garray) PetscFree(aij->garray); 755 if (aij->lvec) VecDestroy(aij->lvec); 756 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 757 if (aij->rowvalues) PetscFree(aij->rowvalues); 758 PetscFree(aij); 759 if (mat->mapping) { 760 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 761 } 762 PLogObjectDestroy(mat); 763 PetscHeaderDestroy(mat); 764 return 0; 765 } 766 767 #undef __FUNC__ 768 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 769 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 770 { 771 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 772 int ierr; 773 774 if (aij->size == 1) { 775 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 776 } 777 else SETERRQ(1,0,"Only uniprocessor output supported"); 778 return 0; 779 } 780 781 #undef __FUNC__ 782 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 783 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 784 { 785 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 786 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 787 int ierr, format,shift = C->indexshift,rank; 788 FILE *fd; 789 ViewerType vtype; 790 791 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 792 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 793 ierr = ViewerGetFormat(viewer,&format); 794 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 795 MatInfo info; 796 int flg; 797 MPI_Comm_rank(mat->comm,&rank); 798 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 799 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 800 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 801 PetscSequentialPhaseBegin(mat->comm,1); 802 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 803 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 804 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 805 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 806 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 807 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 808 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 809 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 810 fflush(fd); 811 PetscSequentialPhaseEnd(mat->comm,1); 812 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 813 return 0; 814 } 815 else if (format == VIEWER_FORMAT_ASCII_INFO) { 816 return 0; 817 } 818 } 819 820 if (vtype == DRAW_VIEWER) { 821 Draw draw; 822 PetscTruth isnull; 823 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 824 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 825 } 826 827 if (vtype == ASCII_FILE_VIEWER) { 828 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 829 PetscSequentialPhaseBegin(mat->comm,1); 830 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 831 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 832 aij->cend); 833 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 834 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 835 fflush(fd); 836 PetscSequentialPhaseEnd(mat->comm,1); 837 } 838 else { 839 int size = aij->size; 840 rank = aij->rank; 841 if (size == 1) { 842 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 843 } 844 else { 845 /* assemble the entire matrix onto first processor. */ 846 Mat A; 847 Mat_SeqAIJ *Aloc; 848 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 849 Scalar *a; 850 851 if (!rank) { 852 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 853 CHKERRQ(ierr); 854 } 855 else { 856 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 857 CHKERRQ(ierr); 858 } 859 PLogObjectParent(mat,A); 860 861 /* copy over the A part */ 862 Aloc = (Mat_SeqAIJ*) aij->A->data; 863 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 864 row = aij->rstart; 865 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 866 for ( i=0; i<m; i++ ) { 867 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 868 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 869 } 870 aj = Aloc->j; 871 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 872 873 /* copy over the B part */ 874 Aloc = (Mat_SeqAIJ*) aij->B->data; 875 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 876 row = aij->rstart; 877 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 878 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 879 for ( i=0; i<m; i++ ) { 880 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 881 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 882 } 883 PetscFree(ct); 884 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 885 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 886 if (!rank) { 887 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 888 } 889 ierr = MatDestroy(A); CHKERRQ(ierr); 890 } 891 } 892 return 0; 893 } 894 895 #undef __FUNC__ 896 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 897 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 898 { 899 Mat mat = (Mat) obj; 900 int ierr; 901 ViewerType vtype; 902 903 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 904 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 905 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 906 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 907 } 908 else if (vtype == BINARY_FILE_VIEWER) { 909 return MatView_MPIAIJ_Binary(mat,viewer); 910 } 911 return 0; 912 } 913 914 /* 915 This has to provide several versions. 916 917 2) a) use only local smoothing updating outer values only once. 918 b) local smoothing updating outer values each inner iteration 919 3) color updating out values betwen colors. 920 */ 921 #undef __FUNC__ 922 #define __FUNC__ "MatRelax_MPIAIJ" 923 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 924 double fshift,int its,Vec xx) 925 { 926 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 927 Mat AA = mat->A, BB = mat->B; 928 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 929 Scalar *b,*x,*xs,*ls,d,*v,sum; 930 int ierr,*idx, *diag; 931 int n = mat->n, m = mat->m, i,shift = A->indexshift; 932 933 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 934 xs = x + shift; /* shift by one for index start of 1 */ 935 ls = ls + shift; 936 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 937 diag = A->diag; 938 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 939 if (flag & SOR_ZERO_INITIAL_GUESS) { 940 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 941 } 942 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 943 CHKERRQ(ierr); 944 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 945 CHKERRQ(ierr); 946 while (its--) { 947 /* go down through the rows */ 948 for ( i=0; i<m; i++ ) { 949 n = A->i[i+1] - A->i[i]; 950 idx = A->j + A->i[i] + shift; 951 v = A->a + A->i[i] + shift; 952 sum = b[i]; 953 SPARSEDENSEMDOT(sum,xs,v,idx,n); 954 d = fshift + A->a[diag[i]+shift]; 955 n = B->i[i+1] - B->i[i]; 956 idx = B->j + B->i[i] + shift; 957 v = B->a + B->i[i] + shift; 958 SPARSEDENSEMDOT(sum,ls,v,idx,n); 959 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 960 } 961 /* come up through the rows */ 962 for ( i=m-1; i>-1; i-- ) { 963 n = A->i[i+1] - A->i[i]; 964 idx = A->j + A->i[i] + shift; 965 v = A->a + A->i[i] + shift; 966 sum = b[i]; 967 SPARSEDENSEMDOT(sum,xs,v,idx,n); 968 d = fshift + A->a[diag[i]+shift]; 969 n = B->i[i+1] - B->i[i]; 970 idx = B->j + B->i[i] + shift; 971 v = B->a + B->i[i] + shift; 972 SPARSEDENSEMDOT(sum,ls,v,idx,n); 973 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 974 } 975 } 976 } 977 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 978 if (flag & SOR_ZERO_INITIAL_GUESS) { 979 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 980 } 981 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 982 CHKERRQ(ierr); 983 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 984 CHKERRQ(ierr); 985 while (its--) { 986 for ( i=0; i<m; i++ ) { 987 n = A->i[i+1] - A->i[i]; 988 idx = A->j + A->i[i] + shift; 989 v = A->a + A->i[i] + shift; 990 sum = b[i]; 991 SPARSEDENSEMDOT(sum,xs,v,idx,n); 992 d = fshift + A->a[diag[i]+shift]; 993 n = B->i[i+1] - B->i[i]; 994 idx = B->j + B->i[i] + shift; 995 v = B->a + B->i[i] + shift; 996 SPARSEDENSEMDOT(sum,ls,v,idx,n); 997 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 998 } 999 } 1000 } 1001 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1002 if (flag & SOR_ZERO_INITIAL_GUESS) { 1003 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 1004 } 1005 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1006 mat->Mvctx); CHKERRQ(ierr); 1007 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1008 mat->Mvctx); CHKERRQ(ierr); 1009 while (its--) { 1010 for ( i=m-1; i>-1; i-- ) { 1011 n = A->i[i+1] - A->i[i]; 1012 idx = A->j + A->i[i] + shift; 1013 v = A->a + A->i[i] + shift; 1014 sum = b[i]; 1015 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1016 d = fshift + A->a[diag[i]+shift]; 1017 n = B->i[i+1] - B->i[i]; 1018 idx = B->j + B->i[i] + shift; 1019 v = B->a + B->i[i] + shift; 1020 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1021 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1022 } 1023 } 1024 } 1025 else { 1026 SETERRQ(1,0,"Option not supported"); 1027 } 1028 return 0; 1029 } 1030 1031 #undef __FUNC__ 1032 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 1033 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1034 { 1035 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1036 Mat A = mat->A, B = mat->B; 1037 int ierr; 1038 double isend[5], irecv[5]; 1039 1040 info->rows_global = (double)mat->M; 1041 info->columns_global = (double)mat->N; 1042 info->rows_local = (double)mat->m; 1043 info->columns_local = (double)mat->N; 1044 info->block_size = 1.0; 1045 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1046 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1047 isend[3] = info->memory; isend[4] = info->mallocs; 1048 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1049 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1050 isend[3] += info->memory; isend[4] += info->mallocs; 1051 if (flag == MAT_LOCAL) { 1052 info->nz_used = isend[0]; 1053 info->nz_allocated = isend[1]; 1054 info->nz_unneeded = isend[2]; 1055 info->memory = isend[3]; 1056 info->mallocs = isend[4]; 1057 } else if (flag == MAT_GLOBAL_MAX) { 1058 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 1059 info->nz_used = irecv[0]; 1060 info->nz_allocated = irecv[1]; 1061 info->nz_unneeded = irecv[2]; 1062 info->memory = irecv[3]; 1063 info->mallocs = irecv[4]; 1064 } else if (flag == MAT_GLOBAL_SUM) { 1065 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 1066 info->nz_used = irecv[0]; 1067 info->nz_allocated = irecv[1]; 1068 info->nz_unneeded = irecv[2]; 1069 info->memory = irecv[3]; 1070 info->mallocs = irecv[4]; 1071 } 1072 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1073 info->fill_ratio_needed = 0; 1074 info->factor_mallocs = 0; 1075 1076 return 0; 1077 } 1078 1079 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1080 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1081 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1082 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1083 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1084 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1085 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1086 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1087 1088 #undef __FUNC__ 1089 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 1090 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1093 1094 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1095 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1096 op == MAT_COLUMNS_UNSORTED || 1097 op == MAT_COLUMNS_SORTED || 1098 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1099 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1100 MatSetOption(a->A,op); 1101 MatSetOption(a->B,op); 1102 } else if (op == MAT_ROW_ORIENTED) { 1103 a->roworiented = 1; 1104 MatSetOption(a->A,op); 1105 MatSetOption(a->B,op); 1106 } else if (op == MAT_ROWS_SORTED || 1107 op == MAT_ROWS_UNSORTED || 1108 op == MAT_SYMMETRIC || 1109 op == MAT_STRUCTURALLY_SYMMETRIC || 1110 op == MAT_YES_NEW_DIAGONALS) 1111 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1112 else if (op == MAT_COLUMN_ORIENTED) { 1113 a->roworiented = 0; 1114 MatSetOption(a->A,op); 1115 MatSetOption(a->B,op); 1116 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1117 a->donotstash = 1; 1118 } else if (op == MAT_NO_NEW_DIAGONALS) 1119 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1120 else 1121 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1122 return 0; 1123 } 1124 1125 #undef __FUNC__ 1126 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 1127 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1128 { 1129 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1130 *m = mat->M; *n = mat->N; 1131 return 0; 1132 } 1133 1134 #undef __FUNC__ 1135 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 1136 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1137 { 1138 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1139 *m = mat->m; *n = mat->N; 1140 return 0; 1141 } 1142 1143 #undef __FUNC__ 1144 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 1145 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1146 { 1147 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1148 *m = mat->rstart; *n = mat->rend; 1149 return 0; 1150 } 1151 1152 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1153 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1154 1155 #undef __FUNC__ 1156 #define __FUNC__ "MatGetRow_MPIAIJ" 1157 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1158 { 1159 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1160 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1161 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1162 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1163 int *cmap, *idx_p; 1164 1165 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1166 mat->getrowactive = PETSC_TRUE; 1167 1168 if (!mat->rowvalues && (idx || v)) { 1169 /* 1170 allocate enough space to hold information from the longest row. 1171 */ 1172 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1173 int max = 1,m = mat->m,tmp; 1174 for ( i=0; i<m; i++ ) { 1175 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1176 if (max < tmp) { max = tmp; } 1177 } 1178 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1179 CHKPTRQ(mat->rowvalues); 1180 mat->rowindices = (int *) (mat->rowvalues + max); 1181 } 1182 1183 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1184 lrow = row - rstart; 1185 1186 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1187 if (!v) {pvA = 0; pvB = 0;} 1188 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1189 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1190 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1191 nztot = nzA + nzB; 1192 1193 cmap = mat->garray; 1194 if (v || idx) { 1195 if (nztot) { 1196 /* Sort by increasing column numbers, assuming A and B already sorted */ 1197 int imark = -1; 1198 if (v) { 1199 *v = v_p = mat->rowvalues; 1200 for ( i=0; i<nzB; i++ ) { 1201 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1202 else break; 1203 } 1204 imark = i; 1205 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1206 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1207 } 1208 if (idx) { 1209 *idx = idx_p = mat->rowindices; 1210 if (imark > -1) { 1211 for ( i=0; i<imark; i++ ) { 1212 idx_p[i] = cmap[cworkB[i]]; 1213 } 1214 } else { 1215 for ( i=0; i<nzB; i++ ) { 1216 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1217 else break; 1218 } 1219 imark = i; 1220 } 1221 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1222 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1223 } 1224 } 1225 else { 1226 if (idx) *idx = 0; 1227 if (v) *v = 0; 1228 } 1229 } 1230 *nz = nztot; 1231 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1232 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1233 return 0; 1234 } 1235 1236 #undef __FUNC__ 1237 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 1238 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1239 { 1240 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1241 if (aij->getrowactive == PETSC_FALSE) { 1242 SETERRQ(1,0,"MatGetRow not called"); 1243 } 1244 aij->getrowactive = PETSC_FALSE; 1245 return 0; 1246 } 1247 1248 #undef __FUNC__ 1249 #define __FUNC__ "MatNorm_MPIAIJ" 1250 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1251 { 1252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1253 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1254 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1255 double sum = 0.0; 1256 Scalar *v; 1257 1258 if (aij->size == 1) { 1259 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1260 } else { 1261 if (type == NORM_FROBENIUS) { 1262 v = amat->a; 1263 for (i=0; i<amat->nz; i++ ) { 1264 #if defined(PETSC_COMPLEX) 1265 sum += real(conj(*v)*(*v)); v++; 1266 #else 1267 sum += (*v)*(*v); v++; 1268 #endif 1269 } 1270 v = bmat->a; 1271 for (i=0; i<bmat->nz; i++ ) { 1272 #if defined(PETSC_COMPLEX) 1273 sum += real(conj(*v)*(*v)); v++; 1274 #else 1275 sum += (*v)*(*v); v++; 1276 #endif 1277 } 1278 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1279 *norm = sqrt(*norm); 1280 } 1281 else if (type == NORM_1) { /* max column norm */ 1282 double *tmp, *tmp2; 1283 int *jj, *garray = aij->garray; 1284 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1285 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1286 PetscMemzero(tmp,aij->N*sizeof(double)); 1287 *norm = 0.0; 1288 v = amat->a; jj = amat->j; 1289 for ( j=0; j<amat->nz; j++ ) { 1290 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1291 } 1292 v = bmat->a; jj = bmat->j; 1293 for ( j=0; j<bmat->nz; j++ ) { 1294 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1295 } 1296 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1297 for ( j=0; j<aij->N; j++ ) { 1298 if (tmp2[j] > *norm) *norm = tmp2[j]; 1299 } 1300 PetscFree(tmp); PetscFree(tmp2); 1301 } 1302 else if (type == NORM_INFINITY) { /* max row norm */ 1303 double ntemp = 0.0; 1304 for ( j=0; j<amat->m; j++ ) { 1305 v = amat->a + amat->i[j] + shift; 1306 sum = 0.0; 1307 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1308 sum += PetscAbsScalar(*v); v++; 1309 } 1310 v = bmat->a + bmat->i[j] + shift; 1311 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1312 sum += PetscAbsScalar(*v); v++; 1313 } 1314 if (sum > ntemp) ntemp = sum; 1315 } 1316 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1317 } 1318 else { 1319 SETERRQ(1,0,"No support for two norm"); 1320 } 1321 } 1322 return 0; 1323 } 1324 1325 #undef __FUNC__ 1326 #define __FUNC__ "MatTranspose_MPIAIJ" 1327 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1328 { 1329 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1330 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1331 int ierr,shift = Aloc->indexshift; 1332 Mat B; 1333 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1334 Scalar *array; 1335 1336 if (matout == PETSC_NULL && M != N) 1337 SETERRQ(1,0,"Square matrix only for in-place"); 1338 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1339 PETSC_NULL,&B); CHKERRQ(ierr); 1340 1341 /* copy over the A part */ 1342 Aloc = (Mat_SeqAIJ*) a->A->data; 1343 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1344 row = a->rstart; 1345 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1346 for ( i=0; i<m; i++ ) { 1347 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1348 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1349 } 1350 aj = Aloc->j; 1351 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1352 1353 /* copy over the B part */ 1354 Aloc = (Mat_SeqAIJ*) a->B->data; 1355 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1356 row = a->rstart; 1357 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1358 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1359 for ( i=0; i<m; i++ ) { 1360 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1361 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1362 } 1363 PetscFree(ct); 1364 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1365 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1366 if (matout != PETSC_NULL) { 1367 *matout = B; 1368 } else { 1369 /* This isn't really an in-place transpose .... but free data structures from a */ 1370 PetscFree(a->rowners); 1371 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1372 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1373 if (a->colmap) PetscFree(a->colmap); 1374 if (a->garray) PetscFree(a->garray); 1375 if (a->lvec) VecDestroy(a->lvec); 1376 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1377 PetscFree(a); 1378 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1379 PetscHeaderDestroy(B); 1380 } 1381 return 0; 1382 } 1383 1384 #undef __FUNC__ 1385 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1386 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1387 { 1388 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1389 Mat a = aij->A, b = aij->B; 1390 int ierr,s1,s2,s3; 1391 1392 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1393 if (rr) { 1394 s3 = aij->n; 1395 VecGetLocalSize_Fast(rr,s1); 1396 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1397 /* Overlap communication with computation. */ 1398 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1399 } 1400 if (ll) { 1401 VecGetLocalSize_Fast(ll,s1); 1402 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1403 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1404 } 1405 /* scale the diagonal block */ 1406 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1407 1408 if (rr) { 1409 /* Do a scatter end and then right scale the off-diagonal block */ 1410 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1411 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1412 } 1413 1414 return 0; 1415 } 1416 1417 1418 extern int MatPrintHelp_SeqAIJ(Mat); 1419 #undef __FUNC__ 1420 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 1421 int MatPrintHelp_MPIAIJ(Mat A) 1422 { 1423 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1424 1425 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1426 else return 0; 1427 } 1428 1429 #undef __FUNC__ 1430 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 1431 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1432 { 1433 *bs = 1; 1434 return 0; 1435 } 1436 #undef __FUNC__ 1437 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 1438 int MatSetUnfactored_MPIAIJ(Mat A) 1439 { 1440 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1441 int ierr; 1442 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1443 return 0; 1444 } 1445 1446 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1447 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1448 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1449 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1450 /* -------------------------------------------------------------------*/ 1451 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1452 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1453 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1454 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1455 0,0, 1456 0,0, 1457 0,0, 1458 MatRelax_MPIAIJ, 1459 MatTranspose_MPIAIJ, 1460 MatGetInfo_MPIAIJ,0, 1461 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1462 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1463 0, 1464 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1465 0,0,0,0, 1466 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1467 0,0, 1468 0,0,MatConvertSameType_MPIAIJ,0,0, 1469 0,0,0, 1470 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1471 MatPrintHelp_MPIAIJ, 1472 MatScale_MPIAIJ,0,0,0, 1473 MatGetBlockSize_MPIAIJ,0,0,0,0, 1474 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1475 1476 1477 #undef __FUNC__ 1478 #define __FUNC__ "MatCreateMPIAIJ" 1479 /*@C 1480 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1481 (the default parallel PETSc format). For good matrix assembly performance 1482 the user should preallocate the matrix storage by setting the parameters 1483 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1484 performance can be increased by more than a factor of 50. 1485 1486 Input Parameters: 1487 . comm - MPI communicator 1488 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1489 This value should be the same as the local size used in creating the 1490 y vector for the matrix-vector product y = Ax. 1491 . n - This value should be the same as the local size used in creating the 1492 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1493 calculated if N is given) For square matrices n is almost always m. 1494 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1495 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1496 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1497 (same for all local rows) 1498 . d_nzz - array containing the number of nonzeros in the various rows of the 1499 diagonal portion of the local submatrix (possibly different for each row) 1500 or PETSC_NULL. You must leave room for the diagonal entry even if 1501 it is zero. 1502 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1503 submatrix (same for all local rows). 1504 . o_nzz - array containing the number of nonzeros in the various rows of the 1505 off-diagonal portion of the local submatrix (possibly different for 1506 each row) or PETSC_NULL. 1507 1508 Output Parameter: 1509 . A - the matrix 1510 1511 Notes: 1512 The AIJ format (also called the Yale sparse matrix format or 1513 compressed row storage), is fully compatible with standard Fortran 77 1514 storage. That is, the stored row and column indices can begin at 1515 either one (as in Fortran) or zero. See the users manual for details. 1516 1517 The user MUST specify either the local or global matrix dimensions 1518 (possibly both). 1519 1520 By default, this format uses inodes (identical nodes) when possible. 1521 We search for consecutive rows with the same nonzero structure, thereby 1522 reusing matrix information to achieve increased efficiency. 1523 1524 Options Database Keys: 1525 $ -mat_aij_no_inode - Do not use inodes 1526 $ -mat_aij_inode_limit <limit> - Set inode limit. 1527 $ (max limit=5) 1528 $ -mat_aij_oneindex - Internally use indexing starting at 1 1529 $ rather than 0. Note: When calling MatSetValues(), 1530 $ the user still MUST index entries starting at 0! 1531 1532 Storage Information: 1533 For a square global matrix we define each processor's diagonal portion 1534 to be its local rows and the corresponding columns (a square submatrix); 1535 each processor's off-diagonal portion encompasses the remainder of the 1536 local matrix (a rectangular submatrix). 1537 1538 The user can specify preallocated storage for the diagonal part of 1539 the local submatrix with either d_nz or d_nnz (not both). Set 1540 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1541 memory allocation. Likewise, specify preallocated storage for the 1542 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1543 1544 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1545 the figure below we depict these three local rows and all columns (0-11). 1546 1547 $ 0 1 2 3 4 5 6 7 8 9 10 11 1548 $ ------------------- 1549 $ row 3 | o o o d d d o o o o o o 1550 $ row 4 | o o o d d d o o o o o o 1551 $ row 5 | o o o d d d o o o o o o 1552 $ ------------------- 1553 $ 1554 1555 Thus, any entries in the d locations are stored in the d (diagonal) 1556 submatrix, and any entries in the o locations are stored in the 1557 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1558 stored simply in the MATSEQAIJ format for compressed row storage. 1559 1560 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1561 and o_nz should indicate the number of nonzeros per row in the o matrix. 1562 In general, for PDE problems in which most nonzeros are near the diagonal, 1563 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1564 or you will get TERRIBLE performance; see the users' manual chapter on 1565 matrices. 1566 1567 .keywords: matrix, aij, compressed row, sparse, parallel 1568 1569 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1570 @*/ 1571 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1572 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1573 { 1574 Mat B; 1575 Mat_MPIAIJ *b; 1576 int ierr, i,sum[2],work[2],size; 1577 1578 *A = 0; 1579 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1580 PLogObjectCreate(B); 1581 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1582 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1583 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1584 MPI_Comm_size(comm,&size); 1585 if (size == 1) { 1586 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1587 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1588 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1589 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1590 B->ops.lufactor = MatLUFactor_MPIAIJ; 1591 B->ops.solve = MatSolve_MPIAIJ; 1592 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1593 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1594 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1595 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1596 } 1597 B->destroy = MatDestroy_MPIAIJ; 1598 B->view = MatView_MPIAIJ; 1599 B->factor = 0; 1600 B->assembled = PETSC_FALSE; 1601 B->mapping = 0; 1602 1603 B->insertmode = NOT_SET_VALUES; 1604 MPI_Comm_rank(comm,&b->rank); 1605 MPI_Comm_size(comm,&b->size); 1606 1607 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1608 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1609 1610 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1611 work[0] = m; work[1] = n; 1612 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1613 if (M == PETSC_DECIDE) M = sum[0]; 1614 if (N == PETSC_DECIDE) N = sum[1]; 1615 } 1616 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1617 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1618 b->m = m; B->m = m; 1619 b->n = n; B->n = n; 1620 b->N = N; B->N = N; 1621 b->M = M; B->M = M; 1622 1623 /* build local table of row and column ownerships */ 1624 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1625 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1626 b->cowners = b->rowners + b->size + 2; 1627 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1628 b->rowners[0] = 0; 1629 for ( i=2; i<=b->size; i++ ) { 1630 b->rowners[i] += b->rowners[i-1]; 1631 } 1632 b->rstart = b->rowners[b->rank]; 1633 b->rend = b->rowners[b->rank+1]; 1634 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1635 b->cowners[0] = 0; 1636 for ( i=2; i<=b->size; i++ ) { 1637 b->cowners[i] += b->cowners[i-1]; 1638 } 1639 b->cstart = b->cowners[b->rank]; 1640 b->cend = b->cowners[b->rank+1]; 1641 1642 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1643 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1644 PLogObjectParent(B,b->A); 1645 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1646 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1647 PLogObjectParent(B,b->B); 1648 1649 /* build cache for off array entries formed */ 1650 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1651 b->donotstash = 0; 1652 b->colmap = 0; 1653 b->garray = 0; 1654 b->roworiented = 1; 1655 1656 /* stuff used for matrix vector multiply */ 1657 b->lvec = 0; 1658 b->Mvctx = 0; 1659 1660 /* stuff for MatGetRow() */ 1661 b->rowindices = 0; 1662 b->rowvalues = 0; 1663 b->getrowactive = PETSC_FALSE; 1664 1665 *A = B; 1666 return 0; 1667 } 1668 1669 #undef __FUNC__ 1670 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1671 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1672 { 1673 Mat mat; 1674 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1675 int ierr, len=0, flg; 1676 1677 *newmat = 0; 1678 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1679 PLogObjectCreate(mat); 1680 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1681 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1682 mat->destroy = MatDestroy_MPIAIJ; 1683 mat->view = MatView_MPIAIJ; 1684 mat->factor = matin->factor; 1685 mat->assembled = PETSC_TRUE; 1686 1687 a->m = mat->m = oldmat->m; 1688 a->n = mat->n = oldmat->n; 1689 a->M = mat->M = oldmat->M; 1690 a->N = mat->N = oldmat->N; 1691 1692 a->rstart = oldmat->rstart; 1693 a->rend = oldmat->rend; 1694 a->cstart = oldmat->cstart; 1695 a->cend = oldmat->cend; 1696 a->size = oldmat->size; 1697 a->rank = oldmat->rank; 1698 mat->insertmode = NOT_SET_VALUES; 1699 a->rowvalues = 0; 1700 a->getrowactive = PETSC_FALSE; 1701 1702 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1703 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1704 a->cowners = a->rowners + a->size + 2; 1705 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1706 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1707 if (oldmat->colmap) { 1708 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1709 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1710 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1711 } else a->colmap = 0; 1712 if (oldmat->garray) { 1713 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1714 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1715 PLogObjectMemory(mat,len*sizeof(int)); 1716 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1717 } else a->garray = 0; 1718 1719 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1720 PLogObjectParent(mat,a->lvec); 1721 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1722 PLogObjectParent(mat,a->Mvctx); 1723 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1724 PLogObjectParent(mat,a->A); 1725 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1726 PLogObjectParent(mat,a->B); 1727 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1728 if (flg) { 1729 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1730 } 1731 *newmat = mat; 1732 return 0; 1733 } 1734 1735 #include "sys.h" 1736 1737 #undef __FUNC__ 1738 #define __FUNC__ "MatLoad_MPIAIJ" 1739 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1740 { 1741 Mat A; 1742 int i, nz, ierr, j,rstart, rend, fd; 1743 Scalar *vals,*svals; 1744 MPI_Comm comm = ((PetscObject)viewer)->comm; 1745 MPI_Status status; 1746 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1747 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1748 int tag = ((PetscObject)viewer)->tag; 1749 1750 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1751 if (!rank) { 1752 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1753 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1754 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1755 } 1756 1757 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1758 M = header[1]; N = header[2]; 1759 /* determine ownership of all rows */ 1760 m = M/size + ((M % size) > rank); 1761 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1762 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1763 rowners[0] = 0; 1764 for ( i=2; i<=size; i++ ) { 1765 rowners[i] += rowners[i-1]; 1766 } 1767 rstart = rowners[rank]; 1768 rend = rowners[rank+1]; 1769 1770 /* distribute row lengths to all processors */ 1771 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1772 offlens = ourlens + (rend-rstart); 1773 if (!rank) { 1774 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1775 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1776 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1777 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1778 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1779 PetscFree(sndcounts); 1780 } 1781 else { 1782 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1783 } 1784 1785 if (!rank) { 1786 /* calculate the number of nonzeros on each processor */ 1787 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1788 PetscMemzero(procsnz,size*sizeof(int)); 1789 for ( i=0; i<size; i++ ) { 1790 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1791 procsnz[i] += rowlengths[j]; 1792 } 1793 } 1794 PetscFree(rowlengths); 1795 1796 /* determine max buffer needed and allocate it */ 1797 maxnz = 0; 1798 for ( i=0; i<size; i++ ) { 1799 maxnz = PetscMax(maxnz,procsnz[i]); 1800 } 1801 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1802 1803 /* read in my part of the matrix column indices */ 1804 nz = procsnz[0]; 1805 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1806 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1807 1808 /* read in every one elses and ship off */ 1809 for ( i=1; i<size; i++ ) { 1810 nz = procsnz[i]; 1811 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1812 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1813 } 1814 PetscFree(cols); 1815 } 1816 else { 1817 /* determine buffer space needed for message */ 1818 nz = 0; 1819 for ( i=0; i<m; i++ ) { 1820 nz += ourlens[i]; 1821 } 1822 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1823 1824 /* receive message of column indices*/ 1825 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1826 MPI_Get_count(&status,MPI_INT,&maxnz); 1827 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1828 } 1829 1830 /* loop over local rows, determining number of off diagonal entries */ 1831 PetscMemzero(offlens,m*sizeof(int)); 1832 jj = 0; 1833 for ( i=0; i<m; i++ ) { 1834 for ( j=0; j<ourlens[i]; j++ ) { 1835 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1836 jj++; 1837 } 1838 } 1839 1840 /* create our matrix */ 1841 for ( i=0; i<m; i++ ) { 1842 ourlens[i] -= offlens[i]; 1843 } 1844 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1845 A = *newmat; 1846 MatSetOption(A,MAT_COLUMNS_SORTED); 1847 for ( i=0; i<m; i++ ) { 1848 ourlens[i] += offlens[i]; 1849 } 1850 1851 if (!rank) { 1852 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1853 1854 /* read in my part of the matrix numerical values */ 1855 nz = procsnz[0]; 1856 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1857 1858 /* insert into matrix */ 1859 jj = rstart; 1860 smycols = mycols; 1861 svals = vals; 1862 for ( i=0; i<m; i++ ) { 1863 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1864 smycols += ourlens[i]; 1865 svals += ourlens[i]; 1866 jj++; 1867 } 1868 1869 /* read in other processors and ship out */ 1870 for ( i=1; i<size; i++ ) { 1871 nz = procsnz[i]; 1872 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1873 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1874 } 1875 PetscFree(procsnz); 1876 } 1877 else { 1878 /* receive numeric values */ 1879 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1880 1881 /* receive message of values*/ 1882 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1883 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1884 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1885 1886 /* insert into matrix */ 1887 jj = rstart; 1888 smycols = mycols; 1889 svals = vals; 1890 for ( i=0; i<m; i++ ) { 1891 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1892 smycols += ourlens[i]; 1893 svals += ourlens[i]; 1894 jj++; 1895 } 1896 } 1897 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1898 1899 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1900 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1901 return 0; 1902 } 1903