1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.279 1999/02/11 20:28:21 bsmith Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 #undef __FUNC__ 15 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20 int n = B->n,i; 21 #if defined (USE_CTABLE) 22 int ierr; 23 #endif 24 25 PetscFunctionBegin; 26 #if defined (USE_CTABLE) 27 ierr = TableCreate( &aij->colmap, aij->n/5 ); CHKERRQ(ierr); 28 for ( i=0; i<n; i++ ){ 29 ierr = TableAdd( aij->colmap, aij->garray[i] + 1, i+1 );CHKERRQ(ierr); 30 } 31 #else 32 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 33 PLogObjectMemory(mat,aij->N*sizeof(int)); 34 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 35 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 36 #endif 37 PetscFunctionReturn(0); 38 } 39 40 extern int DisAssemble_MPIAIJ(Mat); 41 42 #define CHUNKSIZE 15 43 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 44 { \ 45 \ 46 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 47 rmax = aimax[row]; nrow = ailen[row]; \ 48 col1 = col - shift; \ 49 \ 50 low = 0; high = nrow; \ 51 while (high-low > 5) { \ 52 t = (low+high)/2; \ 53 if (rp[t] > col) high = t; \ 54 else low = t; \ 55 } \ 56 for ( _i=0; _i<nrow; _i++ ) { \ 57 if (rp[_i] > col1) break; \ 58 if (rp[_i] == col1) { \ 59 if (addv == ADD_VALUES) ap[_i] += value; \ 60 else ap[_i] = value; \ 61 goto a_noinsert; \ 62 } \ 63 } \ 64 if (nonew == 1) goto a_noinsert; \ 65 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 66 if (nrow >= rmax) { \ 67 /* there is no extra room in row, therefore enlarge */ \ 68 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 69 Scalar *new_a; \ 70 \ 71 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 72 \ 73 /* malloc new storage space */ \ 74 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 75 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 76 new_j = (int *) (new_a + new_nz); \ 77 new_i = new_j + new_nz; \ 78 \ 79 /* copy over old data into new slots */ \ 80 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 81 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 82 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 83 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 84 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 85 len*sizeof(int)); \ 86 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 87 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 88 len*sizeof(Scalar)); \ 89 /* free up old matrix storage */ \ 90 \ 91 PetscFree(a->a); \ 92 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 93 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 94 a->singlemalloc = 1; \ 95 \ 96 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 97 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 98 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 99 a->maxnz += CHUNKSIZE; \ 100 a->reallocs++; \ 101 } \ 102 N = nrow++ - 1; a->nz++; \ 103 /* shift up all the later entries in this row */ \ 104 for ( ii=N; ii>=_i; ii-- ) { \ 105 rp[ii+1] = rp[ii]; \ 106 ap[ii+1] = ap[ii]; \ 107 } \ 108 rp[_i] = col1; \ 109 ap[_i] = value; \ 110 a_noinsert: ; \ 111 ailen[row] = nrow; \ 112 } 113 114 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 118 rmax = bimax[row]; nrow = bilen[row]; \ 119 col1 = col - shift; \ 120 \ 121 low = 0; high = nrow; \ 122 while (high-low > 5) { \ 123 t = (low+high)/2; \ 124 if (rp[t] > col) high = t; \ 125 else low = t; \ 126 } \ 127 for ( _i=0; _i<nrow; _i++ ) { \ 128 if (rp[_i] > col1) break; \ 129 if (rp[_i] == col1) { \ 130 if (addv == ADD_VALUES) ap[_i] += value; \ 131 else ap[_i] = value; \ 132 goto b_noinsert; \ 133 } \ 134 } \ 135 if (nonew == 1) goto b_noinsert; \ 136 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 137 if (nrow >= rmax) { \ 138 /* there is no extra room in row, therefore enlarge */ \ 139 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 140 Scalar *new_a; \ 141 \ 142 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 143 \ 144 /* malloc new storage space */ \ 145 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 146 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 147 new_j = (int *) (new_a + new_nz); \ 148 new_i = new_j + new_nz; \ 149 \ 150 /* copy over old data into new slots */ \ 151 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 152 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 153 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 154 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 155 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 156 len*sizeof(int)); \ 157 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 158 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 159 len*sizeof(Scalar)); \ 160 /* free up old matrix storage */ \ 161 \ 162 PetscFree(b->a); \ 163 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 164 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 165 b->singlemalloc = 1; \ 166 \ 167 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 168 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 169 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 170 b->maxnz += CHUNKSIZE; \ 171 b->reallocs++; \ 172 } \ 173 N = nrow++ - 1; b->nz++; \ 174 /* shift up all the later entries in this row */ \ 175 for ( ii=N; ii>=_i; ii-- ) { \ 176 rp[ii+1] = rp[ii]; \ 177 ap[ii+1] = ap[ii]; \ 178 } \ 179 rp[_i] = col1; \ 180 ap[_i] = value; \ 181 b_noinsert: ; \ 182 bilen[row] = nrow; \ 183 } 184 185 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 186 #undef __FUNC__ 187 #define __FUNC__ "MatSetValues_MPIAIJ" 188 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 189 { 190 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 191 Scalar value; 192 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 193 int cstart = aij->cstart, cend = aij->cend,row,col; 194 int roworiented = aij->roworiented; 195 196 /* Some Variables required in the macro */ 197 Mat A = aij->A; 198 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 199 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 200 Scalar *aa = a->a; 201 202 Mat B = aij->B; 203 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 204 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 205 Scalar *ba = b->a; 206 207 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 208 int nonew = a->nonew,shift = a->indexshift; 209 Scalar *ap; 210 211 PetscFunctionBegin; 212 for ( i=0; i<m; i++ ) { 213 if (im[i] < 0) continue; 214 #if defined(USE_PETSC_BOPT_g) 215 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 216 #endif 217 if (im[i] >= rstart && im[i] < rend) { 218 row = im[i] - rstart; 219 for ( j=0; j<n; j++ ) { 220 if (in[j] >= cstart && in[j] < cend){ 221 col = in[j] - cstart; 222 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } 226 else if (in[j] < 0) continue; 227 #if defined(USE_PETSC_BOPT_g) 228 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 229 #endif 230 else { 231 if (mat->was_assembled) { 232 if (!aij->colmap) { 233 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 234 } 235 #if defined (USE_CTABLE) 236 col = TableFind( aij->colmap, in[j] + 1 ) - 1; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ *) B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !aij->donotstash) { 257 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!aij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNC__ 272 #define __FUNC__ "MatGetValues_MPIAIJ" 273 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 276 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 277 int cstart = aij->cstart, cend = aij->cend,row,col; 278 279 PetscFunctionBegin; 280 for ( i=0; i<m; i++ ) { 281 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 282 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 283 if (idxm[i] >= rstart && idxm[i] < rend) { 284 row = idxm[i] - rstart; 285 for ( j=0; j<n; j++ ) { 286 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 287 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 288 if (idxn[j] >= cstart && idxn[j] < cend){ 289 col = idxn[j] - cstart; 290 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 291 } else { 292 if (!aij->colmap) { 293 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 294 } 295 #if defined (USE_CTABLE) 296 col = TableFind( aij->colmap, idxn[j] + 1 ) - 1; 297 #else 298 col = aij->colmap[idxn[j]] - 1; 299 #endif 300 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 301 else { 302 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 303 } 304 } 305 } 306 } else { 307 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNC__ 314 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 316 { 317 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 318 MPI_Comm comm = mat->comm; 319 int size = aij->size, *owners = aij->rowners; 320 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 321 MPI_Request *send_waits,*recv_waits; 322 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 323 InsertMode addv; 324 Scalar *rvalues,*svalues; 325 326 PetscFunctionBegin; 327 if (aij->donotstash) { 328 aij->svalues = 0; aij->rvalues = 0; 329 aij->nsends = 0; aij->nrecvs = 0; 330 aij->send_waits = 0; aij->recv_waits = 0; 331 aij->rmax = 0; 332 PetscFunctionReturn(0); 333 } 334 335 /* make sure all processors are either in INSERTMODE or ADDMODE */ 336 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 337 if (addv == (ADD_VALUES|INSERT_VALUES)) { 338 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 339 } 340 mat->insertmode = addv; /* in case this processor had no cache */ 341 342 /* first count number of contributors to each processor */ 343 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 344 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 345 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 346 for ( i=0; i<aij->stash.n; i++ ) { 347 idx = aij->stash.idx[i]; 348 for ( j=0; j<size; j++ ) { 349 if (idx >= owners[j] && idx < owners[j+1]) { 350 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 351 } 352 } 353 } 354 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 355 356 /* inform other processors of number of messages and max length*/ 357 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 358 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 359 nreceives = work[rank]; 360 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 361 nmax = work[rank]; 362 PetscFree(work); 363 364 /* post receives: 365 1) each message will consist of ordered pairs 366 (global index,value) we store the global index as a double 367 to simplify the message passing. 368 2) since we don't know how long each individual message is we 369 allocate the largest needed buffer for each receive. Potentially 370 this is a lot of wasted space. 371 372 373 This could be done better. 374 */ 375 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 376 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 377 for ( i=0; i<nreceives; i++ ) { 378 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 379 comm,recv_waits+i);CHKERRQ(ierr); 380 } 381 382 /* do sends: 383 1) starts[i] gives the starting index in svalues for stuff going to 384 the ith processor 385 */ 386 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 387 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 388 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 389 starts[0] = 0; 390 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 391 for ( i=0; i<aij->stash.n; i++ ) { 392 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 393 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 394 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 395 } 396 PetscFree(owner); 397 starts[0] = 0; 398 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 399 count = 0; 400 for ( i=0; i<size; i++ ) { 401 if (procs[i]) { 402 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 403 } 404 } 405 PetscFree(starts); 406 PetscFree(nprocs); 407 408 /* Free cache space */ 409 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 410 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 411 412 aij->svalues = svalues; aij->rvalues = rvalues; 413 aij->nsends = nsends; aij->nrecvs = nreceives; 414 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 415 aij->rmax = nmax; 416 417 PetscFunctionReturn(0); 418 } 419 extern int MatSetUpMultiply_MPIAIJ(Mat); 420 421 #undef __FUNC__ 422 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 423 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 424 { 425 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 426 MPI_Status *send_status,recv_status; 427 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 428 int row,col,other_disassembled; 429 Scalar *values,val; 430 InsertMode addv = mat->insertmode; 431 432 PetscFunctionBegin; 433 434 /* wait on receives */ 435 while (count) { 436 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 437 /* unpack receives into our local space */ 438 values = aij->rvalues + 3*imdex*aij->rmax; 439 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 440 n = n/3; 441 for ( i=0; i<n; i++ ) { 442 row = (int) PetscReal(values[3*i]) - aij->rstart; 443 col = (int) PetscReal(values[3*i+1]); 444 val = values[3*i+2]; 445 if (col >= aij->cstart && col < aij->cend) { 446 col -= aij->cstart; 447 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 448 } else { 449 if (mat->was_assembled || mat->assembled) { 450 if (!aij->colmap) { 451 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 452 } 453 #if defined (USE_CTABLE) 454 col = TableFind( aij->colmap, col + 1 ) - 1; 455 #else 456 col = aij->colmap[col] - 1; 457 #endif 458 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 col = (int) PetscReal(values[3*i+1]); 461 } 462 } 463 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 464 } 465 } 466 count--; 467 } 468 if (aij->recv_waits) PetscFree(aij->recv_waits); 469 if (aij->rvalues) PetscFree(aij->rvalues); 470 471 /* wait on sends */ 472 if (aij->nsends) { 473 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 474 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 475 PetscFree(send_status); 476 } 477 if (aij->send_waits) PetscFree(aij->send_waits); 478 if (aij->svalues) PetscFree(aij->svalues); 479 480 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 481 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 482 483 /* determine if any processor has disassembled, if so we must 484 also disassemble ourselfs, in order that we may reassemble. */ 485 /* 486 if nonzero structure of submatrix B cannot change then we know that 487 no processor disassembled thus we can skip this stuff 488 */ 489 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 490 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 491 if (mat->was_assembled && !other_disassembled) { 492 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 493 } 494 } 495 496 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 497 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 498 } 499 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 500 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 501 502 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNC__ 507 #define __FUNC__ "MatZeroEntries_MPIAIJ" 508 int MatZeroEntries_MPIAIJ(Mat A) 509 { 510 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 511 int ierr; 512 513 PetscFunctionBegin; 514 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 515 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 #undef __FUNC__ 520 #define __FUNC__ "MatZeroRows_MPIAIJ" 521 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 522 { 523 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 524 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 525 int *procs,*nprocs,j,found,idx,nsends,*work,row; 526 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 527 int *rvalues,tag = A->tag,count,base,slen,n,*source; 528 int *lens,imdex,*lrows,*values,rstart=l->rstart; 529 MPI_Comm comm = A->comm; 530 MPI_Request *send_waits,*recv_waits; 531 MPI_Status recv_status,*send_status; 532 IS istmp; 533 534 PetscFunctionBegin; 535 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 536 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 537 538 /* first count number of contributors to each processor */ 539 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 540 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 541 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 542 for ( i=0; i<N; i++ ) { 543 idx = rows[i]; 544 found = 0; 545 for ( j=0; j<size; j++ ) { 546 if (idx >= owners[j] && idx < owners[j+1]) { 547 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 548 } 549 } 550 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 551 } 552 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 553 554 /* inform other processors of number of messages and max length*/ 555 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 556 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 557 nrecvs = work[rank]; 558 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 559 nmax = work[rank]; 560 PetscFree(work); 561 562 /* post receives: */ 563 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 564 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 565 for ( i=0; i<nrecvs; i++ ) { 566 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 567 } 568 569 /* do sends: 570 1) starts[i] gives the starting index in svalues for stuff going to 571 the ith processor 572 */ 573 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 574 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 575 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 576 starts[0] = 0; 577 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 578 for ( i=0; i<N; i++ ) { 579 svalues[starts[owner[i]]++] = rows[i]; 580 } 581 ISRestoreIndices(is,&rows); 582 583 starts[0] = 0; 584 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 585 count = 0; 586 for ( i=0; i<size; i++ ) { 587 if (procs[i]) { 588 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 589 } 590 } 591 PetscFree(starts); 592 593 base = owners[rank]; 594 595 /* wait on receives */ 596 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 597 source = lens + nrecvs; 598 count = nrecvs; slen = 0; 599 while (count) { 600 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 601 /* unpack receives into our local space */ 602 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 603 source[imdex] = recv_status.MPI_SOURCE; 604 lens[imdex] = n; 605 slen += n; 606 count--; 607 } 608 PetscFree(recv_waits); 609 610 /* move the data into the send scatter */ 611 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 612 count = 0; 613 for ( i=0; i<nrecvs; i++ ) { 614 values = rvalues + i*nmax; 615 for ( j=0; j<lens[i]; j++ ) { 616 lrows[count++] = values[j] - base; 617 } 618 } 619 PetscFree(rvalues); PetscFree(lens); 620 PetscFree(owner); PetscFree(nprocs); 621 622 /* actually zap the local rows */ 623 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 624 PLogObjectParent(A,istmp); 625 626 /* 627 Zero the required rows. If the "diagonal block" of the matrix 628 is square and the user wishes to set the diagonal we use seperate 629 code so that MatSetValues() is not called for each diagonal allocating 630 new memory, thus calling lots of mallocs and slowing things down. 631 632 Contributed by: Mathew Knepley 633 */ 634 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 635 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 636 if (diag && (l->A->M == l->A->N)) { 637 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 638 } else if (diag) { 639 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 640 for ( i = 0; i < slen; i++ ) { 641 row = lrows[i] + rstart; 642 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 643 } 644 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 645 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 646 } else { 647 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 648 } 649 ierr = ISDestroy(istmp); CHKERRQ(ierr); 650 PetscFree(lrows); 651 652 /* wait on sends */ 653 if (nsends) { 654 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 655 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 656 PetscFree(send_status); 657 } 658 PetscFree(send_waits); PetscFree(svalues); 659 660 PetscFunctionReturn(0); 661 } 662 663 #undef __FUNC__ 664 #define __FUNC__ "MatMult_MPIAIJ" 665 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 666 { 667 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 668 int ierr,nt; 669 670 PetscFunctionBegin; 671 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 672 if (nt != a->n) { 673 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 674 } 675 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 676 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 677 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 678 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 679 PetscFunctionReturn(0); 680 } 681 682 #undef __FUNC__ 683 #define __FUNC__ "MatMultAdd_MPIAIJ" 684 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 685 { 686 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 687 int ierr; 688 689 PetscFunctionBegin; 690 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 691 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 692 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 693 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNC__ 698 #define __FUNC__ "MatMultTrans_MPIAIJ" 699 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 700 { 701 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 702 int ierr; 703 704 PetscFunctionBegin; 705 /* do nondiagonal part */ 706 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 707 /* send it on its way */ 708 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 709 /* do local part */ 710 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 711 /* receive remote parts: note this assumes the values are not actually */ 712 /* inserted in yy until the next line, which is true for my implementation*/ 713 /* but is not perhaps always true. */ 714 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 715 PetscFunctionReturn(0); 716 } 717 718 #undef __FUNC__ 719 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 720 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 721 { 722 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 723 int ierr; 724 725 PetscFunctionBegin; 726 /* do nondiagonal part */ 727 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 728 /* send it on its way */ 729 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 730 /* do local part */ 731 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 732 /* receive remote parts: note this assumes the values are not actually */ 733 /* inserted in yy until the next line, which is true for my implementation*/ 734 /* but is not perhaps always true. */ 735 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 /* 740 This only works correctly for square matrices where the subblock A->A is the 741 diagonal block 742 */ 743 #undef __FUNC__ 744 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 745 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 746 { 747 int ierr; 748 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 749 750 PetscFunctionBegin; 751 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 752 if (a->rstart != a->cstart || a->rend != a->cend) { 753 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 754 } 755 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 756 PetscFunctionReturn(0); 757 } 758 759 #undef __FUNC__ 760 #define __FUNC__ "MatScale_MPIAIJ" 761 int MatScale_MPIAIJ(Scalar *aa,Mat A) 762 { 763 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 764 int ierr; 765 766 PetscFunctionBegin; 767 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 768 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 #undef __FUNC__ 773 #define __FUNC__ "MatDestroy_MPIAIJ" 774 int MatDestroy_MPIAIJ(Mat mat) 775 { 776 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 777 int ierr; 778 779 PetscFunctionBegin; 780 if (--mat->refct > 0) PetscFunctionReturn(0); 781 782 if (mat->mapping) { 783 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 784 } 785 if (mat->bmapping) { 786 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 787 } 788 if (mat->rmap) { 789 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 790 } 791 if (mat->cmap) { 792 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 793 } 794 #if defined(USE_PETSC_LOG) 795 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 796 #endif 797 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 798 PetscFree(aij->rowners); 799 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 800 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 801 #if defined (USE_CTABLE) 802 if (aij->colmap) TableDelete(aij->colmap); 803 #else 804 if (aij->colmap) PetscFree(aij->colmap); 805 #endif 806 if (aij->garray) PetscFree(aij->garray); 807 if (aij->lvec) VecDestroy(aij->lvec); 808 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 809 if (aij->rowvalues) PetscFree(aij->rowvalues); 810 PetscFree(aij); 811 PLogObjectDestroy(mat); 812 PetscHeaderDestroy(mat); 813 PetscFunctionReturn(0); 814 } 815 816 #undef __FUNC__ 817 #define __FUNC__ "MatView_MPIAIJ_Binary" 818 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 819 { 820 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 821 int ierr; 822 823 PetscFunctionBegin; 824 if (aij->size == 1) { 825 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 826 } 827 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 828 PetscFunctionReturn(0); 829 } 830 831 #undef __FUNC__ 832 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 833 extern int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 834 { 835 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 836 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 837 int ierr, format,shift = C->indexshift,rank = aij->rank ; 838 int size = aij->size; 839 FILE *fd; 840 ViewerType vtype; 841 842 PetscFunctionBegin; 843 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 844 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 845 ierr = ViewerGetFormat(viewer,&format); 846 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 847 MatInfo info; 848 int flg; 849 MPI_Comm_rank(mat->comm,&rank); 850 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 851 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 852 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 853 PetscSequentialPhaseBegin(mat->comm,1); 854 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 855 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 856 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 857 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 858 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 859 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 860 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 861 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 862 fflush(fd); 863 PetscSequentialPhaseEnd(mat->comm,1); 864 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 867 PetscFunctionReturn(0); 868 } 869 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 870 Draw draw; 871 PetscTruth isnull; 872 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 873 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 874 } 875 876 if (size == 1) { 877 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 878 } else { 879 /* assemble the entire matrix onto first processor. */ 880 Mat A; 881 Mat_SeqAIJ *Aloc; 882 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 883 Scalar *a; 884 885 if (!rank) { 886 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 887 } else { 888 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 889 } 890 PLogObjectParent(mat,A); 891 892 /* copy over the A part */ 893 Aloc = (Mat_SeqAIJ*) aij->A->data; 894 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 895 row = aij->rstart; 896 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 897 for ( i=0; i<m; i++ ) { 898 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 899 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 900 } 901 aj = Aloc->j; 902 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 903 904 /* copy over the B part */ 905 Aloc = (Mat_SeqAIJ*) aij->B->data; 906 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 907 row = aij->rstart; 908 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 909 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 910 for ( i=0; i<m; i++ ) { 911 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 912 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 913 } 914 PetscFree(ct); 915 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 916 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 917 /* 918 Everyone has to call to draw the matrix since the graphics waits are 919 synchronized across all processors that share the Draw object 920 */ 921 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 922 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 923 } 924 ierr = MatDestroy(A); CHKERRQ(ierr); 925 } 926 PetscFunctionReturn(0); 927 } 928 929 #undef __FUNC__ 930 #define __FUNC__ "MatView_MPIAIJ" 931 int MatView_MPIAIJ(Mat mat,Viewer viewer) 932 { 933 int ierr; 934 ViewerType vtype; 935 936 PetscFunctionBegin; 937 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 938 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 939 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 940 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 941 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 942 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 943 } else { 944 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 945 } 946 PetscFunctionReturn(0); 947 } 948 949 /* 950 This has to provide several versions. 951 952 2) a) use only local smoothing updating outer values only once. 953 b) local smoothing updating outer values each inner iteration 954 3) color updating out values betwen colors. 955 */ 956 #undef __FUNC__ 957 #define __FUNC__ "MatRelax_MPIAIJ" 958 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 959 double fshift,int its,Vec xx) 960 { 961 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 962 Mat AA = mat->A, BB = mat->B; 963 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 964 Scalar *b,*x,*xs,*ls,d,*v,sum; 965 int ierr,*idx, *diag; 966 int n = mat->n, m = mat->m, i,shift = A->indexshift; 967 968 PetscFunctionBegin; 969 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 970 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 971 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 972 xs = x + shift; /* shift by one for index start of 1 */ 973 ls = ls + shift; 974 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 975 diag = A->diag; 976 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 977 if (flag & SOR_ZERO_INITIAL_GUESS) { 978 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 979 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 980 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 981 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 985 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 986 while (its--) { 987 /* go down through the rows */ 988 for ( i=0; i<m; i++ ) { 989 n = A->i[i+1] - A->i[i]; 990 PLogFlops(4*n+3); 991 idx = A->j + A->i[i] + shift; 992 v = A->a + A->i[i] + shift; 993 sum = b[i]; 994 SPARSEDENSEMDOT(sum,xs,v,idx,n); 995 d = fshift + A->a[diag[i]+shift]; 996 n = B->i[i+1] - B->i[i]; 997 idx = B->j + B->i[i] + shift; 998 v = B->a + B->i[i] + shift; 999 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1000 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1001 } 1002 /* come up through the rows */ 1003 for ( i=m-1; i>-1; i-- ) { 1004 n = A->i[i+1] - A->i[i]; 1005 PLogFlops(4*n+3) 1006 idx = A->j + A->i[i] + shift; 1007 v = A->a + A->i[i] + shift; 1008 sum = b[i]; 1009 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1010 d = fshift + A->a[diag[i]+shift]; 1011 n = B->i[i+1] - B->i[i]; 1012 idx = B->j + B->i[i] + shift; 1013 v = B->a + B->i[i] + shift; 1014 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1015 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1016 } 1017 } 1018 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1019 if (flag & SOR_ZERO_INITIAL_GUESS) { 1020 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1021 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1022 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1023 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1027 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1028 while (its--) { 1029 for ( i=0; i<m; i++ ) { 1030 n = A->i[i+1] - A->i[i]; 1031 PLogFlops(4*n+3); 1032 idx = A->j + A->i[i] + shift; 1033 v = A->a + A->i[i] + shift; 1034 sum = b[i]; 1035 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1036 d = fshift + A->a[diag[i]+shift]; 1037 n = B->i[i+1] - B->i[i]; 1038 idx = B->j + B->i[i] + shift; 1039 v = B->a + B->i[i] + shift; 1040 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1041 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1042 } 1043 } 1044 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1045 if (flag & SOR_ZERO_INITIAL_GUESS) { 1046 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1047 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1048 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1049 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1050 PetscFunctionReturn(0); 1051 } 1052 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 while (its--) { 1055 for ( i=m-1; i>-1; i-- ) { 1056 n = A->i[i+1] - A->i[i]; 1057 PLogFlops(4*n+3); 1058 idx = A->j + A->i[i] + shift; 1059 v = A->a + A->i[i] + shift; 1060 sum = b[i]; 1061 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1062 d = fshift + A->a[diag[i]+shift]; 1063 n = B->i[i+1] - B->i[i]; 1064 idx = B->j + B->i[i] + shift; 1065 v = B->a + B->i[i] + shift; 1066 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1067 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1068 } 1069 } 1070 } else { 1071 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1072 } 1073 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1074 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1075 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNC__ 1080 #define __FUNC__ "MatGetInfo_MPIAIJ" 1081 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1082 { 1083 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1084 Mat A = mat->A, B = mat->B; 1085 int ierr; 1086 double isend[5], irecv[5]; 1087 1088 PetscFunctionBegin; 1089 info->block_size = 1.0; 1090 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1091 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1092 isend[3] = info->memory; isend[4] = info->mallocs; 1093 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1094 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1095 isend[3] += info->memory; isend[4] += info->mallocs; 1096 if (flag == MAT_LOCAL) { 1097 info->nz_used = isend[0]; 1098 info->nz_allocated = isend[1]; 1099 info->nz_unneeded = isend[2]; 1100 info->memory = isend[3]; 1101 info->mallocs = isend[4]; 1102 } else if (flag == MAT_GLOBAL_MAX) { 1103 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1104 info->nz_used = irecv[0]; 1105 info->nz_allocated = irecv[1]; 1106 info->nz_unneeded = irecv[2]; 1107 info->memory = irecv[3]; 1108 info->mallocs = irecv[4]; 1109 } else if (flag == MAT_GLOBAL_SUM) { 1110 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1111 info->nz_used = irecv[0]; 1112 info->nz_allocated = irecv[1]; 1113 info->nz_unneeded = irecv[2]; 1114 info->memory = irecv[3]; 1115 info->mallocs = irecv[4]; 1116 } 1117 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1118 info->fill_ratio_needed = 0; 1119 info->factor_mallocs = 0; 1120 info->rows_global = (double)mat->M; 1121 info->columns_global = (double)mat->N; 1122 info->rows_local = (double)mat->m; 1123 info->columns_local = (double)mat->N; 1124 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNC__ 1129 #define __FUNC__ "MatSetOption_MPIAIJ" 1130 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1133 int ierr; 1134 1135 PetscFunctionBegin; 1136 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1137 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1138 op == MAT_COLUMNS_UNSORTED || 1139 op == MAT_COLUMNS_SORTED || 1140 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1141 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1142 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1143 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1144 } else if (op == MAT_ROW_ORIENTED) { 1145 a->roworiented = 1; 1146 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1147 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1148 } else if (op == MAT_ROWS_SORTED || 1149 op == MAT_ROWS_UNSORTED || 1150 op == MAT_SYMMETRIC || 1151 op == MAT_STRUCTURALLY_SYMMETRIC || 1152 op == MAT_YES_NEW_DIAGONALS) 1153 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1154 else if (op == MAT_COLUMN_ORIENTED) { 1155 a->roworiented = 0; 1156 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1157 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1158 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1159 a->donotstash = 1; 1160 } else if (op == MAT_NO_NEW_DIAGONALS){ 1161 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1162 } else { 1163 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1164 } 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNC__ 1169 #define __FUNC__ "MatGetSize_MPIAIJ" 1170 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1171 { 1172 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1173 1174 PetscFunctionBegin; 1175 if (m) *m = mat->M; 1176 if (n) *n = mat->N; 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNC__ 1181 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1182 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1183 { 1184 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1185 1186 PetscFunctionBegin; 1187 if (m) *m = mat->m; 1188 if (n) *n = mat->n; 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNC__ 1193 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1194 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1195 { 1196 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1197 1198 PetscFunctionBegin; 1199 *m = mat->rstart; *n = mat->rend; 1200 PetscFunctionReturn(0); 1201 } 1202 1203 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1204 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1205 1206 #undef __FUNC__ 1207 #define __FUNC__ "MatGetRow_MPIAIJ" 1208 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1209 { 1210 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1211 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1212 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1213 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1214 int *cmap, *idx_p; 1215 1216 PetscFunctionBegin; 1217 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1218 mat->getrowactive = PETSC_TRUE; 1219 1220 if (!mat->rowvalues && (idx || v)) { 1221 /* 1222 allocate enough space to hold information from the longest row. 1223 */ 1224 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1225 int max = 1,m = mat->m,tmp; 1226 for ( i=0; i<m; i++ ) { 1227 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1228 if (max < tmp) { max = tmp; } 1229 } 1230 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1231 mat->rowindices = (int *) (mat->rowvalues + max); 1232 } 1233 1234 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1235 lrow = row - rstart; 1236 1237 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1238 if (!v) {pvA = 0; pvB = 0;} 1239 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1240 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1241 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1242 nztot = nzA + nzB; 1243 1244 cmap = mat->garray; 1245 if (v || idx) { 1246 if (nztot) { 1247 /* Sort by increasing column numbers, assuming A and B already sorted */ 1248 int imark = -1; 1249 if (v) { 1250 *v = v_p = mat->rowvalues; 1251 for ( i=0; i<nzB; i++ ) { 1252 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1253 else break; 1254 } 1255 imark = i; 1256 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1257 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1258 } 1259 if (idx) { 1260 *idx = idx_p = mat->rowindices; 1261 if (imark > -1) { 1262 for ( i=0; i<imark; i++ ) { 1263 idx_p[i] = cmap[cworkB[i]]; 1264 } 1265 } else { 1266 for ( i=0; i<nzB; i++ ) { 1267 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1268 else break; 1269 } 1270 imark = i; 1271 } 1272 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1273 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1274 } 1275 } else { 1276 if (idx) *idx = 0; 1277 if (v) *v = 0; 1278 } 1279 } 1280 *nz = nztot; 1281 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1282 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNC__ 1287 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1288 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1289 { 1290 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1291 1292 PetscFunctionBegin; 1293 if (aij->getrowactive == PETSC_FALSE) { 1294 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1295 } 1296 aij->getrowactive = PETSC_FALSE; 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNC__ 1301 #define __FUNC__ "MatNorm_MPIAIJ" 1302 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1303 { 1304 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1305 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1306 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1307 double sum = 0.0; 1308 Scalar *v; 1309 1310 PetscFunctionBegin; 1311 if (aij->size == 1) { 1312 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1313 } else { 1314 if (type == NORM_FROBENIUS) { 1315 v = amat->a; 1316 for (i=0; i<amat->nz; i++ ) { 1317 #if defined(USE_PETSC_COMPLEX) 1318 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1319 #else 1320 sum += (*v)*(*v); v++; 1321 #endif 1322 } 1323 v = bmat->a; 1324 for (i=0; i<bmat->nz; i++ ) { 1325 #if defined(USE_PETSC_COMPLEX) 1326 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1327 #else 1328 sum += (*v)*(*v); v++; 1329 #endif 1330 } 1331 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1332 *norm = sqrt(*norm); 1333 } else if (type == NORM_1) { /* max column norm */ 1334 double *tmp, *tmp2; 1335 int *jj, *garray = aij->garray; 1336 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1337 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1338 PetscMemzero(tmp,aij->N*sizeof(double)); 1339 *norm = 0.0; 1340 v = amat->a; jj = amat->j; 1341 for ( j=0; j<amat->nz; j++ ) { 1342 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1343 } 1344 v = bmat->a; jj = bmat->j; 1345 for ( j=0; j<bmat->nz; j++ ) { 1346 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1347 } 1348 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1349 for ( j=0; j<aij->N; j++ ) { 1350 if (tmp2[j] > *norm) *norm = tmp2[j]; 1351 } 1352 PetscFree(tmp); PetscFree(tmp2); 1353 } else if (type == NORM_INFINITY) { /* max row norm */ 1354 double ntemp = 0.0; 1355 for ( j=0; j<amat->m; j++ ) { 1356 v = amat->a + amat->i[j] + shift; 1357 sum = 0.0; 1358 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1359 sum += PetscAbsScalar(*v); v++; 1360 } 1361 v = bmat->a + bmat->i[j] + shift; 1362 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1363 sum += PetscAbsScalar(*v); v++; 1364 } 1365 if (sum > ntemp) ntemp = sum; 1366 } 1367 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1368 } else { 1369 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1370 } 1371 } 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNC__ 1376 #define __FUNC__ "MatTranspose_MPIAIJ" 1377 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1378 { 1379 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1380 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1381 int ierr,shift = Aloc->indexshift; 1382 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1383 Mat B; 1384 Scalar *array; 1385 1386 PetscFunctionBegin; 1387 if (matout == PETSC_NULL && M != N) { 1388 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1389 } 1390 1391 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1392 1393 /* copy over the A part */ 1394 Aloc = (Mat_SeqAIJ*) a->A->data; 1395 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1396 row = a->rstart; 1397 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1398 for ( i=0; i<m; i++ ) { 1399 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1400 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1401 } 1402 aj = Aloc->j; 1403 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1404 1405 /* copy over the B part */ 1406 Aloc = (Mat_SeqAIJ*) a->B->data; 1407 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1408 row = a->rstart; 1409 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1410 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1411 for ( i=0; i<m; i++ ) { 1412 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1413 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1414 } 1415 PetscFree(ct); 1416 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1417 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1418 if (matout != PETSC_NULL) { 1419 *matout = B; 1420 } else { 1421 PetscOps *Abops; 1422 struct _MatOps *Aops; 1423 1424 /* This isn't really an in-place transpose .... but free data structures from a */ 1425 PetscFree(a->rowners); 1426 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1427 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1428 #if defined (USE_CTABLE) 1429 if (a->colmap) TableDelete(a->colmap); 1430 #else 1431 if (a->colmap) PetscFree(a->colmap); 1432 #endif 1433 if (a->garray) PetscFree(a->garray); 1434 if (a->lvec) VecDestroy(a->lvec); 1435 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1436 PetscFree(a); 1437 1438 /* 1439 This is horrible, horrible code. We need to keep the 1440 A pointers for the bops and ops but copy everything 1441 else from C. 1442 */ 1443 Abops = A->bops; 1444 Aops = A->ops; 1445 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1446 A->bops = Abops; 1447 A->ops = Aops; 1448 PetscHeaderDestroy(B); 1449 } 1450 PetscFunctionReturn(0); 1451 } 1452 1453 #undef __FUNC__ 1454 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1455 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1456 { 1457 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1458 Mat a = aij->A, b = aij->B; 1459 int ierr,s1,s2,s3; 1460 1461 PetscFunctionBegin; 1462 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1463 if (rr) { 1464 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1465 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1466 /* Overlap communication with computation. */ 1467 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1468 } 1469 if (ll) { 1470 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1471 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1472 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1473 } 1474 /* scale the diagonal block */ 1475 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1476 1477 if (rr) { 1478 /* Do a scatter end and then right scale the off-diagonal block */ 1479 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1480 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1481 } 1482 1483 PetscFunctionReturn(0); 1484 } 1485 1486 1487 extern int MatPrintHelp_SeqAIJ(Mat); 1488 #undef __FUNC__ 1489 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1490 int MatPrintHelp_MPIAIJ(Mat A) 1491 { 1492 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1493 int ierr; 1494 1495 PetscFunctionBegin; 1496 if (!a->rank) { 1497 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1498 } 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNC__ 1503 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1504 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1505 { 1506 PetscFunctionBegin; 1507 *bs = 1; 1508 PetscFunctionReturn(0); 1509 } 1510 #undef __FUNC__ 1511 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1512 int MatSetUnfactored_MPIAIJ(Mat A) 1513 { 1514 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1515 int ierr; 1516 1517 PetscFunctionBegin; 1518 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 1522 #undef __FUNC__ 1523 #define __FUNC__ "MatEqual_MPIAIJ" 1524 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1525 { 1526 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1527 Mat a, b, c, d; 1528 PetscTruth flg; 1529 int ierr; 1530 1531 PetscFunctionBegin; 1532 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1533 a = matA->A; b = matA->B; 1534 c = matB->A; d = matB->B; 1535 1536 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1537 if (flg == PETSC_TRUE) { 1538 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1539 } 1540 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNC__ 1545 #define __FUNC__ "MatCopy_MPIAIJ" 1546 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1547 { 1548 int ierr; 1549 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1550 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1551 1552 PetscFunctionBegin; 1553 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1554 /* because of the column compression in the off-processor part of the matrix a->B, 1555 the number of columns in a->B and b->B may be different, hence we cannot call 1556 the MatCopy() directly on the two parts. If need be, we can provide a more 1557 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1558 then copying the submatrices */ 1559 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1560 } else { 1561 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1562 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1563 } 1564 PetscFunctionReturn(0); 1565 } 1566 1567 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1568 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1569 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1570 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1571 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1572 1573 /* -------------------------------------------------------------------*/ 1574 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1575 MatGetRow_MPIAIJ, 1576 MatRestoreRow_MPIAIJ, 1577 MatMult_MPIAIJ, 1578 MatMultAdd_MPIAIJ, 1579 MatMultTrans_MPIAIJ, 1580 MatMultTransAdd_MPIAIJ, 1581 0, 1582 0, 1583 0, 1584 0, 1585 0, 1586 0, 1587 MatRelax_MPIAIJ, 1588 MatTranspose_MPIAIJ, 1589 MatGetInfo_MPIAIJ, 1590 MatEqual_MPIAIJ, 1591 MatGetDiagonal_MPIAIJ, 1592 MatDiagonalScale_MPIAIJ, 1593 MatNorm_MPIAIJ, 1594 MatAssemblyBegin_MPIAIJ, 1595 MatAssemblyEnd_MPIAIJ, 1596 0, 1597 MatSetOption_MPIAIJ, 1598 MatZeroEntries_MPIAIJ, 1599 MatZeroRows_MPIAIJ, 1600 0, 1601 0, 1602 0, 1603 0, 1604 MatGetSize_MPIAIJ, 1605 MatGetLocalSize_MPIAIJ, 1606 MatGetOwnershipRange_MPIAIJ, 1607 0, 1608 0, 1609 0, 1610 0, 1611 MatDuplicate_MPIAIJ, 1612 0, 1613 0, 1614 0, 1615 0, 1616 0, 1617 MatGetSubMatrices_MPIAIJ, 1618 MatIncreaseOverlap_MPIAIJ, 1619 MatGetValues_MPIAIJ, 1620 MatCopy_MPIAIJ, 1621 MatPrintHelp_MPIAIJ, 1622 MatScale_MPIAIJ, 1623 0, 1624 0, 1625 0, 1626 MatGetBlockSize_MPIAIJ, 1627 0, 1628 0, 1629 0, 1630 0, 1631 MatFDColoringCreate_MPIAIJ, 1632 0, 1633 MatSetUnfactored_MPIAIJ, 1634 0, 1635 0, 1636 MatGetSubMatrix_MPIAIJ, 1637 0, 1638 0, 1639 MatGetMaps_Petsc}; 1640 1641 /* ----------------------------------------------------------------------------------------*/ 1642 1643 EXTERN_C_BEGIN 1644 #undef __FUNC__ 1645 #define __FUNC__ "MatStoreValues_MPIAIJ" 1646 int MatStoreValues_MPIAIJ(Mat mat) 1647 { 1648 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1649 int ierr; 1650 1651 PetscFunctionBegin; 1652 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1653 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1654 PetscFunctionReturn(0); 1655 } 1656 EXTERN_C_END 1657 1658 EXTERN_C_BEGIN 1659 #undef __FUNC__ 1660 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1661 int MatRetrieveValues_MPIAIJ(Mat mat) 1662 { 1663 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1664 int ierr; 1665 1666 PetscFunctionBegin; 1667 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1668 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1669 PetscFunctionReturn(0); 1670 } 1671 EXTERN_C_END 1672 1673 #include "pc.h" 1674 EXTERN_C_BEGIN 1675 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1676 EXTERN_C_END 1677 1678 #undef __FUNC__ 1679 #define __FUNC__ "MatCreateMPIAIJ" 1680 /*@C 1681 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1682 (the default parallel PETSc format). For good matrix assembly performance 1683 the user should preallocate the matrix storage by setting the parameters 1684 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1685 performance can be increased by more than a factor of 50. 1686 1687 Collective on MPI_Comm 1688 1689 Input Parameters: 1690 + comm - MPI communicator 1691 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1692 This value should be the same as the local size used in creating the 1693 y vector for the matrix-vector product y = Ax. 1694 . n - This value should be the same as the local size used in creating the 1695 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1696 calculated if N is given) For square matrices n is almost always m. 1697 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1698 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1699 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1700 (same value is used for all local rows) 1701 . d_nnz - array containing the number of nonzeros in the various rows of the 1702 DIAGONAL portion of the local submatrix (possibly different for each row) 1703 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1704 The size of this array is equal to the number of local rows, i.e 'm'. 1705 You must leave room for the diagonal entry even if it is zero. 1706 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1707 submatrix (same value is used for all local rows). 1708 - o_nnz - array containing the number of nonzeros in the various rows of the 1709 OFF-DIAGONAL portion of the local submatrix (possibly different for 1710 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1711 structure. The size of this array is equal to the number 1712 of local rows, i.e 'm'. 1713 1714 Output Parameter: 1715 . A - the matrix 1716 1717 Notes: 1718 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1719 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1720 storage requirements for this matrix. 1721 1722 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1723 processor than it must be used on all processors that share the object for 1724 that argument. 1725 1726 The AIJ format (also called the Yale sparse matrix format or 1727 compressed row storage), is fully compatible with standard Fortran 77 1728 storage. That is, the stored row and column indices can begin at 1729 either one (as in Fortran) or zero. See the users manual for details. 1730 1731 The user MUST specify either the local or global matrix dimensions 1732 (possibly both). 1733 1734 The parallel matrix is partitioned such that the first m0 rows belong to 1735 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1736 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1737 1738 The DIAGONAL portion of the local submatrix of a processor can be defined 1739 as the submatrix which is obtained by extraction the part corresponding 1740 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1741 first row that belongs to the processor, and r2 is the last row belonging 1742 to the this processor. This is a square mxm matrix. The remaining portion 1743 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1744 1745 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1746 1747 By default, this format uses inodes (identical nodes) when possible. 1748 We search for consecutive rows with the same nonzero structure, thereby 1749 reusing matrix information to achieve increased efficiency. 1750 1751 Options Database Keys: 1752 + -mat_aij_no_inode - Do not use inodes 1753 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1754 - -mat_aij_oneindex - Internally use indexing starting at 1 1755 rather than 0. Note that when calling MatSetValues(), 1756 the user still MUST index entries starting at 0! 1757 . -mat_mpi - use the parallel matrix data structures even on one processor 1758 (defaults to using SeqBAIJ format on one processor) 1759 . -mat_mpi - use the parallel matrix data structures even on one processor 1760 (defaults to using SeqAIJ format on one processor) 1761 1762 1763 Example usage: 1764 1765 Consider the following 8x8 matrix with 34 non-zero values, that is 1766 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1767 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1768 as follows: 1769 1770 .vb 1771 1 2 0 | 0 3 0 | 0 4 1772 Proc0 0 5 6 | 7 0 0 | 8 0 1773 9 0 10 | 11 0 0 | 12 0 1774 ------------------------------------- 1775 13 0 14 | 15 16 17 | 0 0 1776 Proc1 0 18 0 | 19 20 21 | 0 0 1777 0 0 0 | 22 23 0 | 24 0 1778 ------------------------------------- 1779 Proc2 25 26 27 | 0 0 28 | 29 0 1780 30 0 0 | 31 32 33 | 0 34 1781 .ve 1782 1783 This can be represented as a collection of submatrices as: 1784 1785 .vb 1786 A B C 1787 D E F 1788 G H I 1789 .ve 1790 1791 Where the submatrices A,B,C are owned by proc0, D,E,F are 1792 owned by proc1, G,H,I are owned by proc2. 1793 1794 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1795 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1796 The 'M','N' parameters are 8,8, and have the same values on all procs. 1797 1798 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1799 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1800 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1801 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1802 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1803 matrix, ans [DF] as another SeqAIJ matrix. 1804 1805 When d_nz, o_nz parameters are specified, d_nz storage elements are 1806 allocated for every row of the local diagonal submatrix, and o_nz 1807 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1808 One way to choose d_nz and o_nz is to use the max nonzerors per local 1809 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1810 In this case, the values of d_nz,o_nz are: 1811 .vb 1812 proc0 : dnz = 2, o_nz = 2 1813 proc1 : dnz = 3, o_nz = 2 1814 proc2 : dnz = 1, o_nz = 4 1815 .ve 1816 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1817 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1818 for proc3. i.e we are using 12+15+10=37 storage locations to store 1819 34 values. 1820 1821 When d_nnz, o_nnz parameters are specified, the storage is specified 1822 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1823 In the above case the values for d_nnz,o_nnz are: 1824 .vb 1825 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1826 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1827 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1828 .ve 1829 Here the space allocated is sum of all the above values i.e 34, and 1830 hence pre-allocation is perfect. 1831 1832 Level: intermediate 1833 1834 .keywords: matrix, aij, compressed row, sparse, parallel 1835 1836 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1837 @*/ 1838 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1839 { 1840 Mat B; 1841 Mat_MPIAIJ *b; 1842 int ierr, i,size,flag1 = 0, flag2 = 0; 1843 1844 PetscFunctionBegin; 1845 MPI_Comm_size(comm,&size); 1846 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr); 1847 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1848 if (!flag1 && !flag2 && size == 1) { 1849 if (M == PETSC_DECIDE) M = m; 1850 if (N == PETSC_DECIDE) N = n; 1851 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1852 PetscFunctionReturn(0); 1853 } 1854 1855 *A = 0; 1856 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1857 PLogObjectCreate(B); 1858 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1859 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1860 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1861 B->ops->destroy = MatDestroy_MPIAIJ; 1862 B->ops->view = MatView_MPIAIJ; 1863 B->factor = 0; 1864 B->assembled = PETSC_FALSE; 1865 B->mapping = 0; 1866 1867 B->insertmode = NOT_SET_VALUES; 1868 b->size = size; 1869 MPI_Comm_rank(comm,&b->rank); 1870 1871 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1872 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1873 } 1874 1875 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1876 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1877 b->m = m; B->m = m; 1878 b->n = n; B->n = n; 1879 b->N = N; B->N = N; 1880 b->M = M; B->M = M; 1881 1882 /* the information in the maps duplicates the information computed below, eventually 1883 we should remove the duplicate information that is not contained in the maps */ 1884 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1885 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1886 1887 /* build local table of row and column ownerships */ 1888 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1889 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1890 b->cowners = b->rowners + b->size + 2; 1891 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1892 b->rowners[0] = 0; 1893 for ( i=2; i<=b->size; i++ ) { 1894 b->rowners[i] += b->rowners[i-1]; 1895 } 1896 b->rstart = b->rowners[b->rank]; 1897 b->rend = b->rowners[b->rank+1]; 1898 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1899 b->cowners[0] = 0; 1900 for ( i=2; i<=b->size; i++ ) { 1901 b->cowners[i] += b->cowners[i-1]; 1902 } 1903 b->cstart = b->cowners[b->rank]; 1904 b->cend = b->cowners[b->rank+1]; 1905 1906 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1907 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1908 PLogObjectParent(B,b->A); 1909 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1910 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1911 PLogObjectParent(B,b->B); 1912 1913 /* build cache for off array entries formed */ 1914 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1915 b->donotstash = 0; 1916 b->colmap = 0; 1917 b->garray = 0; 1918 b->roworiented = 1; 1919 1920 /* stuff used for matrix vector multiply */ 1921 b->lvec = 0; 1922 b->Mvctx = 0; 1923 1924 /* stuff for MatGetRow() */ 1925 b->rowindices = 0; 1926 b->rowvalues = 0; 1927 b->getrowactive = PETSC_FALSE; 1928 1929 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1930 "MatStoreValues_MPIAIJ", 1931 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1932 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1933 "MatRetrieveValues_MPIAIJ", 1934 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1935 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1936 "MatGetDiagonalBlock_MPIAIJ", 1937 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1938 *A = B; 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNC__ 1943 #define __FUNC__ "MatDuplicate_MPIAIJ" 1944 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1945 { 1946 Mat mat; 1947 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1948 int ierr, len=0, flg; 1949 1950 PetscFunctionBegin; 1951 *newmat = 0; 1952 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1953 PLogObjectCreate(mat); 1954 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1955 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1956 mat->ops->destroy = MatDestroy_MPIAIJ; 1957 mat->ops->view = MatView_MPIAIJ; 1958 mat->factor = matin->factor; 1959 mat->assembled = PETSC_TRUE; 1960 1961 a->m = mat->m = oldmat->m; 1962 a->n = mat->n = oldmat->n; 1963 a->M = mat->M = oldmat->M; 1964 a->N = mat->N = oldmat->N; 1965 1966 a->rstart = oldmat->rstart; 1967 a->rend = oldmat->rend; 1968 a->cstart = oldmat->cstart; 1969 a->cend = oldmat->cend; 1970 a->size = oldmat->size; 1971 a->rank = oldmat->rank; 1972 mat->insertmode = NOT_SET_VALUES; 1973 a->rowvalues = 0; 1974 a->getrowactive = PETSC_FALSE; 1975 1976 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1977 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1978 a->cowners = a->rowners + a->size + 2; 1979 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1980 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1981 if (oldmat->colmap) { 1982 #if defined (USE_CTABLE) 1983 ierr = TableCreateCopy( &a->colmap, oldmat->colmap ); CHKERRQ(ierr); 1984 #else 1985 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1986 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1987 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1988 #endif 1989 } else a->colmap = 0; 1990 if (oldmat->garray) { 1991 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1992 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1993 PLogObjectMemory(mat,len*sizeof(int)); 1994 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1995 } else a->garray = 0; 1996 1997 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1998 PLogObjectParent(mat,a->lvec); 1999 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2000 PLogObjectParent(mat,a->Mvctx); 2001 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2002 PLogObjectParent(mat,a->A); 2003 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2004 PLogObjectParent(mat,a->B); 2005 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2006 if (flg) { 2007 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2008 } 2009 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2010 *newmat = mat; 2011 PetscFunctionReturn(0); 2012 } 2013 2014 #include "sys.h" 2015 2016 #undef __FUNC__ 2017 #define __FUNC__ "MatLoad_MPIAIJ" 2018 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 2019 { 2020 Mat A; 2021 Scalar *vals,*svals; 2022 MPI_Comm comm = ((PetscObject)viewer)->comm; 2023 MPI_Status status; 2024 int i, nz, ierr, j,rstart, rend, fd; 2025 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 2026 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 2027 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 2028 2029 PetscFunctionBegin; 2030 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2031 if (!rank) { 2032 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2033 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2034 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2035 if (header[3] < 0) { 2036 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 2037 } 2038 } 2039 2040 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2041 M = header[1]; N = header[2]; 2042 /* determine ownership of all rows */ 2043 m = M/size + ((M % size) > rank); 2044 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 2045 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2046 rowners[0] = 0; 2047 for ( i=2; i<=size; i++ ) { 2048 rowners[i] += rowners[i-1]; 2049 } 2050 rstart = rowners[rank]; 2051 rend = rowners[rank+1]; 2052 2053 /* distribute row lengths to all processors */ 2054 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 2055 offlens = ourlens + (rend-rstart); 2056 if (!rank) { 2057 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 2058 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2059 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2060 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2061 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2062 PetscFree(sndcounts); 2063 } else { 2064 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2065 } 2066 2067 if (!rank) { 2068 /* calculate the number of nonzeros on each processor */ 2069 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2070 PetscMemzero(procsnz,size*sizeof(int)); 2071 for ( i=0; i<size; i++ ) { 2072 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2073 procsnz[i] += rowlengths[j]; 2074 } 2075 } 2076 PetscFree(rowlengths); 2077 2078 /* determine max buffer needed and allocate it */ 2079 maxnz = 0; 2080 for ( i=0; i<size; i++ ) { 2081 maxnz = PetscMax(maxnz,procsnz[i]); 2082 } 2083 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2084 2085 /* read in my part of the matrix column indices */ 2086 nz = procsnz[0]; 2087 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2088 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2089 2090 /* read in every one elses and ship off */ 2091 for ( i=1; i<size; i++ ) { 2092 nz = procsnz[i]; 2093 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2094 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2095 } 2096 PetscFree(cols); 2097 } else { 2098 /* determine buffer space needed for message */ 2099 nz = 0; 2100 for ( i=0; i<m; i++ ) { 2101 nz += ourlens[i]; 2102 } 2103 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2104 2105 /* receive message of column indices*/ 2106 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2107 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2108 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2109 } 2110 2111 /* determine column ownership if matrix is not square */ 2112 if (N != M) { 2113 n = N/size + ((N % size) > rank); 2114 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2115 cstart = cend - n; 2116 } else { 2117 cstart = rstart; 2118 cend = rend; 2119 n = cend - cstart; 2120 } 2121 2122 /* loop over local rows, determining number of off diagonal entries */ 2123 PetscMemzero(offlens,m*sizeof(int)); 2124 jj = 0; 2125 for ( i=0; i<m; i++ ) { 2126 for ( j=0; j<ourlens[i]; j++ ) { 2127 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2128 jj++; 2129 } 2130 } 2131 2132 /* create our matrix */ 2133 for ( i=0; i<m; i++ ) { 2134 ourlens[i] -= offlens[i]; 2135 } 2136 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2137 A = *newmat; 2138 ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr); 2139 for ( i=0; i<m; i++ ) { 2140 ourlens[i] += offlens[i]; 2141 } 2142 2143 if (!rank) { 2144 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 2145 2146 /* read in my part of the matrix numerical values */ 2147 nz = procsnz[0]; 2148 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2149 2150 /* insert into matrix */ 2151 jj = rstart; 2152 smycols = mycols; 2153 svals = vals; 2154 for ( i=0; i<m; i++ ) { 2155 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2156 smycols += ourlens[i]; 2157 svals += ourlens[i]; 2158 jj++; 2159 } 2160 2161 /* read in other processors and ship out */ 2162 for ( i=1; i<size; i++ ) { 2163 nz = procsnz[i]; 2164 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2165 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2166 } 2167 PetscFree(procsnz); 2168 } else { 2169 /* receive numeric values */ 2170 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2171 2172 /* receive message of values*/ 2173 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2174 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2175 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2176 2177 /* insert into matrix */ 2178 jj = rstart; 2179 smycols = mycols; 2180 svals = vals; 2181 for ( i=0; i<m; i++ ) { 2182 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2183 smycols += ourlens[i]; 2184 svals += ourlens[i]; 2185 jj++; 2186 } 2187 } 2188 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2189 2190 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2191 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNC__ 2196 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2197 /* 2198 Not great since it makes two copies of the submatrix, first an SeqAIJ 2199 in local and then by concatenating the local matrices the end result. 2200 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2201 */ 2202 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2203 { 2204 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2205 Mat *local,M, Mreuse; 2206 Scalar *vwork,*aa; 2207 MPI_Comm comm = mat->comm; 2208 Mat_SeqAIJ *aij; 2209 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2210 2211 PetscFunctionBegin; 2212 MPI_Comm_rank(comm,&rank); 2213 MPI_Comm_size(comm,&size); 2214 2215 if (call == MAT_REUSE_MATRIX) { 2216 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2217 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2218 local = &Mreuse; 2219 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2220 } else { 2221 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2222 Mreuse = *local; 2223 PetscFree(local); 2224 } 2225 2226 /* 2227 m - number of local rows 2228 n - number of columns (same on all processors) 2229 rstart - first row in new global matrix generated 2230 */ 2231 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2232 if (call == MAT_INITIAL_MATRIX) { 2233 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2234 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2235 ii = aij->i; 2236 jj = aij->j; 2237 2238 /* 2239 Determine the number of non-zeros in the diagonal and off-diagonal 2240 portions of the matrix in order to do correct preallocation 2241 */ 2242 2243 /* first get start and end of "diagonal" columns */ 2244 if (csize == PETSC_DECIDE) { 2245 nlocal = n/size + ((n % size) > rank); 2246 } else { 2247 nlocal = csize; 2248 } 2249 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2250 rstart = rend - nlocal; 2251 if (rank == size - 1 && rend != n) { 2252 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2253 } 2254 2255 /* next, compute all the lengths */ 2256 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2257 olens = dlens + m; 2258 for ( i=0; i<m; i++ ) { 2259 jend = ii[i+1] - ii[i]; 2260 olen = 0; 2261 dlen = 0; 2262 for ( j=0; j<jend; j++ ) { 2263 if ( *jj < rstart || *jj >= rend) olen++; 2264 else dlen++; 2265 jj++; 2266 } 2267 olens[i] = olen; 2268 dlens[i] = dlen; 2269 } 2270 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2271 PetscFree(dlens); 2272 } else { 2273 int ml,nl; 2274 2275 M = *newmat; 2276 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2277 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2278 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2279 /* 2280 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2281 rather than the slower MatSetValues(). 2282 */ 2283 M->was_assembled = PETSC_TRUE; 2284 M->assembled = PETSC_FALSE; 2285 } 2286 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2287 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2288 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2289 ii = aij->i; 2290 jj = aij->j; 2291 aa = aij->a; 2292 for (i=0; i<m; i++) { 2293 row = rstart + i; 2294 nz = ii[i+1] - ii[i]; 2295 cwork = jj; jj += nz; 2296 vwork = aa; aa += nz; 2297 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2298 } 2299 2300 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2301 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2302 *newmat = M; 2303 2304 /* save submatrix used in processor for next request */ 2305 if (call == MAT_INITIAL_MATRIX) { 2306 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2307 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2308 } 2309 2310 PetscFunctionReturn(0); 2311 } 2312