1 /*$Id: mpiaij.c,v 1.312 2000/04/09 04:36:04 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 extern int MatSetUpMultiply_MPIAIJ(Mat); 8 extern int DisAssemble_MPIAIJ(Mat); 9 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 10 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 12 extern int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ /*<a name=""></a>*/"CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 27 int n = B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = PetscTableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr); 32 for (i=0; i<n; i++){ 33 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 aij->colmap = (int*)PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 37 PLogObjectMemory(mat,aij->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr); 39 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for (_i=low; _i<high; _i++) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 71 Scalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 77 new_a = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 78 new_j = (int*)(new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 83 for (ii=row+1; ii<a->m+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(Scalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = PETSC_TRUE; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for (ii=N; ii>=_i; ii--) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for (_i=low; _i<high; _i++) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 145 Scalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 151 new_a = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 152 new_j = (int*)(new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 157 for (ii=row+1; ii<b->m+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(Scalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = PETSC_TRUE; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for (ii=N; ii>=_i; ii--) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNC__ 194 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 198 Scalar value; 199 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 200 int cstart = aij->cstart,cend = aij->cend,row,col; 201 int roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 206 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 207 Scalar *aa = a->a; 208 PetscTruth ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE:PETSC_FALSE); 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 211 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j; 212 Scalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 Scalar *ap; 217 218 PetscFunctionBegin; 219 for (i=0; i<m; i++) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 if (ignorezeroentries && value == 0.0) continue; 231 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 232 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 233 } 234 else if (in[j] < 0) continue; 235 #if defined(PETSC_USE_BOPT_g) 236 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 237 #endif 238 else { 239 if (mat->was_assembled) { 240 if (!aij->colmap) { 241 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 242 } 243 #if defined (PETSC_USE_CTABLE) 244 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 245 col--; 246 #else 247 col = aij->colmap[in[j]] - 1; 248 #endif 249 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 250 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 251 col = in[j]; 252 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 253 B = aij->B; 254 b = (Mat_SeqAIJ*)B->data; 255 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 256 ba = b->a; 257 } 258 } else col = in[j]; 259 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 260 if (ignorezeroentries && value == 0.0) continue; 261 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } 264 } 265 } else { 266 if (!aij->donotstash) { 267 if (roworiented) { 268 if (ignorezeroentries && v[i*n] == 0.0) continue; 269 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 270 } else { 271 if (ignorezeroentries && v[i] == 0.0) continue; 272 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 273 } 274 } 275 } 276 } 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNC__ 281 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIAIJ" 282 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 283 { 284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 285 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 286 int cstart = aij->cstart,cend = aij->cend,row,col; 287 288 PetscFunctionBegin; 289 for (i=0; i<m; i++) { 290 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 291 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 292 if (idxm[i] >= rstart && idxm[i] < rend) { 293 row = idxm[i] - rstart; 294 for (j=0; j<n; j++) { 295 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 296 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 297 if (idxn[j] >= cstart && idxn[j] < cend){ 298 col = idxn[j] - cstart; 299 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 300 } else { 301 if (!aij->colmap) { 302 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 303 } 304 #if defined (PETSC_USE_CTABLE) 305 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 306 col --; 307 #else 308 col = aij->colmap[idxn[j]] - 1; 309 #endif 310 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 311 else { 312 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 313 } 314 } 315 } 316 } else { 317 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 318 } 319 } 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNC__ 324 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIAIJ" 325 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 326 { 327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 328 int ierr,nstash,reallocs; 329 InsertMode addv; 330 331 PetscFunctionBegin; 332 if (aij->donotstash) { 333 PetscFunctionReturn(0); 334 } 335 336 /* make sure all processors are either in INSERTMODE or ADDMODE */ 337 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 338 if (addv == (ADD_VALUES|INSERT_VALUES)) { 339 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 340 } 341 mat->insertmode = addv; /* in case this processor had no cache */ 342 343 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 344 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 345 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 346 PetscFunctionReturn(0); 347 } 348 349 350 #undef __FUNC__ 351 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIAIJ" 352 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 353 { 354 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 355 int i,j,rstart,ncols,n,ierr,flg; 356 int *row,*col,other_disassembled; 357 Scalar *val; 358 InsertMode addv = mat->insertmode; 359 360 PetscFunctionBegin; 361 if (!aij->donotstash) { 362 while (1) { 363 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 364 if (!flg) break; 365 366 for (i=0; i<n;) { 367 /* Now identify the consecutive vals belonging to the same row */ 368 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 369 if (j < n) ncols = j-i; 370 else ncols = n-i; 371 /* Now assemble all these values with a single function call */ 372 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 373 i = j; 374 } 375 } 376 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 377 } 378 379 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 380 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 381 382 /* determine if any processor has disassembled, if so we must 383 also disassemble ourselfs, in order that we may reassemble. */ 384 /* 385 if nonzero structure of submatrix B cannot change then we know that 386 no processor disassembled thus we can skip this stuff 387 */ 388 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 389 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 390 if (mat->was_assembled && !other_disassembled) { 391 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 392 } 393 } 394 395 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 396 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 397 } 398 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 399 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 400 401 if (aij->rowvalues) { 402 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 403 aij->rowvalues = 0; 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNC__ 409 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIAIJ" 410 int MatZeroEntries_MPIAIJ(Mat A) 411 { 412 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 413 int ierr; 414 415 PetscFunctionBegin; 416 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 417 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNC__ 422 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIAIJ" 423 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 424 { 425 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 426 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 427 int *procs,*nprocs,j,found,idx,nsends,*work,row; 428 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 429 int *rvalues,tag = A->tag,count,base,slen,n,*source; 430 int *lens,imdex,*lrows,*values,rstart=l->rstart; 431 MPI_Comm comm = A->comm; 432 MPI_Request *send_waits,*recv_waits; 433 MPI_Status recv_status,*send_status; 434 IS istmp; 435 436 PetscFunctionBegin; 437 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 438 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 439 440 /* first count number of contributors to each processor */ 441 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 442 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 443 procs = nprocs + size; 444 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 445 for (i=0; i<N; i++) { 446 idx = rows[i]; 447 found = 0; 448 for (j=0; j<size; j++) { 449 if (idx >= owners[j] && idx < owners[j+1]) { 450 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 451 } 452 } 453 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 454 } 455 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 456 457 /* inform other processors of number of messages and max length*/ 458 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 459 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 460 nrecvs = work[size+rank]; 461 nmax = work[rank]; 462 ierr = PetscFree(work);CHKERRQ(ierr); 463 464 /* post receives: */ 465 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 466 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 467 for (i=0; i<nrecvs; i++) { 468 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 469 } 470 471 /* do sends: 472 1) starts[i] gives the starting index in svalues for stuff going to 473 the ith processor 474 */ 475 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 476 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 477 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 478 starts[0] = 0; 479 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 480 for (i=0; i<N; i++) { 481 svalues[starts[owner[i]]++] = rows[i]; 482 } 483 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 484 485 starts[0] = 0; 486 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 487 count = 0; 488 for (i=0; i<size; i++) { 489 if (procs[i]) { 490 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 491 } 492 } 493 ierr = PetscFree(starts);CHKERRQ(ierr); 494 495 base = owners[rank]; 496 497 /* wait on receives */ 498 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 499 source = lens + nrecvs; 500 count = nrecvs; slen = 0; 501 while (count) { 502 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 503 /* unpack receives into our local space */ 504 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 505 source[imdex] = recv_status.MPI_SOURCE; 506 lens[imdex] = n; 507 slen += n; 508 count--; 509 } 510 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 511 512 /* move the data into the send scatter */ 513 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 514 count = 0; 515 for (i=0; i<nrecvs; i++) { 516 values = rvalues + i*nmax; 517 for (j=0; j<lens[i]; j++) { 518 lrows[count++] = values[j] - base; 519 } 520 } 521 ierr = PetscFree(rvalues);CHKERRQ(ierr); 522 ierr = PetscFree(lens);CHKERRQ(ierr); 523 ierr = PetscFree(owner);CHKERRQ(ierr); 524 ierr = PetscFree(nprocs);CHKERRQ(ierr); 525 526 /* actually zap the local rows */ 527 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 528 PLogObjectParent(A,istmp); 529 530 /* 531 Zero the required rows. If the "diagonal block" of the matrix 532 is square and the user wishes to set the diagonal we use seperate 533 code so that MatSetValues() is not called for each diagonal allocating 534 new memory, thus calling lots of mallocs and slowing things down. 535 536 Contributed by: Mathew Knepley 537 */ 538 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 539 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 540 if (diag && (l->A->M == l->A->N)) { 541 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 542 } else if (diag) { 543 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 544 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 545 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 546 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 547 } 548 for (i = 0; i < slen; i++) { 549 row = lrows[i] + rstart; 550 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 551 } 552 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 } else { 555 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 556 } 557 ierr = ISDestroy(istmp);CHKERRQ(ierr); 558 ierr = PetscFree(lrows);CHKERRQ(ierr); 559 560 /* wait on sends */ 561 if (nsends) { 562 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 563 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 564 ierr = PetscFree(send_status);CHKERRQ(ierr); 565 } 566 ierr = PetscFree(send_waits);CHKERRQ(ierr); 567 ierr = PetscFree(svalues);CHKERRQ(ierr); 568 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNC__ 573 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIAIJ" 574 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 575 { 576 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 577 int ierr,nt; 578 579 PetscFunctionBegin; 580 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 581 if (nt != a->n) { 582 SETERRQ2(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A (%d) and xx (%d)",a->n,nt); 583 } 584 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 586 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNC__ 592 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIAIJ" 593 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 594 { 595 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 596 int ierr; 597 598 PetscFunctionBegin; 599 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 601 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605 606 #undef __FUNC__ 607 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIAIJ" 608 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 609 { 610 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 611 int ierr; 612 613 PetscFunctionBegin; 614 /* do nondiagonal part */ 615 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 616 /* send it on its way */ 617 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 618 /* do local part */ 619 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 620 /* receive remote parts: note this assumes the values are not actually */ 621 /* inserted in yy until the next line, which is true for my implementation*/ 622 /* but is not perhaps always true. */ 623 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 #undef __FUNC__ 628 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIAIJ" 629 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 630 { 631 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 632 int ierr; 633 634 PetscFunctionBegin; 635 /* do nondiagonal part */ 636 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 637 /* send it on its way */ 638 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 639 /* do local part */ 640 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 641 /* receive remote parts: note this assumes the values are not actually */ 642 /* inserted in yy until the next line, which is true for my implementation*/ 643 /* but is not perhaps always true. */ 644 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 645 PetscFunctionReturn(0); 646 } 647 648 /* 649 This only works correctly for square matrices where the subblock A->A is the 650 diagonal block 651 */ 652 #undef __FUNC__ 653 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIAIJ" 654 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 655 { 656 int ierr; 657 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 658 659 PetscFunctionBegin; 660 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 661 if (a->rstart != a->cstart || a->rend != a->cend) { 662 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 663 } 664 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNC__ 669 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIAIJ" 670 int MatScale_MPIAIJ(Scalar *aa,Mat A) 671 { 672 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 673 int ierr; 674 675 PetscFunctionBegin; 676 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 677 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 #undef __FUNC__ 682 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIAIJ" 683 int MatDestroy_MPIAIJ(Mat mat) 684 { 685 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 686 int ierr; 687 688 PetscFunctionBegin; 689 690 if (mat->mapping) { 691 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 692 } 693 if (mat->bmapping) { 694 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 695 } 696 if (mat->rmap) { 697 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 698 } 699 if (mat->cmap) { 700 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 701 } 702 #if defined(PETSC_USE_LOG) 703 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 704 #endif 705 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 706 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 707 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 708 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 709 #if defined (PETSC_USE_CTABLE) 710 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 711 #else 712 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 713 #endif 714 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 715 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 716 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 717 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 718 ierr = PetscFree(aij);CHKERRQ(ierr); 719 PLogObjectDestroy(mat); 720 PetscHeaderDestroy(mat); 721 PetscFunctionReturn(0); 722 } 723 724 #undef __FUNC__ 725 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ_Binary" 726 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 729 int ierr; 730 731 PetscFunctionBegin; 732 if (aij->size == 1) { 733 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 734 } 735 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNC__ 740 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ_ASCIIorDraworSocket" 741 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 744 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 745 int ierr,format,shift = C->indexshift,rank = aij->rank,size = aij->size; 746 PetscTruth isdraw,isascii,flg; 747 748 PetscFunctionBegin; 749 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 750 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 751 if (isascii) { 752 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 753 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 754 MatInfo info; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 757 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 758 if (flg) { 759 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 760 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 761 } else { 762 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 763 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 764 } 765 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 766 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 767 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 768 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 769 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 770 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 771 PetscFunctionReturn(0); 772 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 773 PetscFunctionReturn(0); 774 } 775 } else if (isdraw) { 776 Draw draw; 777 PetscTruth isnull; 778 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 779 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 780 } 781 782 if (size == 1) { 783 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 784 } else { 785 /* assemble the entire matrix onto first processor. */ 786 Mat A; 787 Mat_SeqAIJ *Aloc; 788 int M = aij->M,N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 789 Scalar *a; 790 791 if (!rank) { 792 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 793 } else { 794 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 795 } 796 PLogObjectParent(mat,A); 797 798 /* copy over the A part */ 799 Aloc = (Mat_SeqAIJ*)aij->A->data; 800 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 801 row = aij->rstart; 802 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 803 for (i=0; i<m; i++) { 804 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 805 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 806 } 807 aj = Aloc->j; 808 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 809 810 /* copy over the B part */ 811 Aloc = (Mat_SeqAIJ*)aij->B->data; 812 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 813 row = aij->rstart; 814 ct = cols = (int*)PetscMalloc((ai[m]+1)*sizeof(int));CHKPTRQ(cols); 815 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 816 for (i=0; i<m; i++) { 817 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 818 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 819 } 820 ierr = PetscFree(ct);CHKERRQ(ierr); 821 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 /* 824 Everyone has to call to draw the matrix since the graphics waits are 825 synchronized across all processors that share the Draw object 826 */ 827 if (!rank) { 828 Viewer sviewer; 829 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 830 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 831 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 832 } 833 ierr = MatDestroy(A);CHKERRQ(ierr); 834 } 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNC__ 839 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ" 840 int MatView_MPIAIJ(Mat mat,Viewer viewer) 841 { 842 int ierr; 843 PetscTruth isascii,isdraw,issocket,isbinary; 844 845 PetscFunctionBegin; 846 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 847 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 848 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 849 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 850 if (isascii || isdraw || isbinary || issocket) { 851 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 852 } else { 853 SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 854 } 855 PetscFunctionReturn(0); 856 } 857 858 /* 859 This has to provide several versions. 860 861 2) a) use only local smoothing updating outer values only once. 862 b) local smoothing updating outer values each inner iteration 863 3) color updating out values betwen colors. 864 */ 865 #undef __FUNC__ 866 #define __FUNC__ /*<a name=""></a>*/"MatRelax_MPIAIJ" 867 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag, 868 PetscReal fshift,int its,Vec xx) 869 { 870 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 871 Mat AA = mat->A,BB = mat->B; 872 Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data; 873 Scalar *b,*x,*xs,*ls,d,*v,sum; 874 int ierr,*idx,*diag; 875 int n = mat->n,m = mat->m,i,shift = A->indexshift; 876 877 PetscFunctionBegin; 878 if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);} 879 diag = A->diag; 880 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 881 if (flag & SOR_ZERO_INITIAL_GUESS) { 882 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 886 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 887 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 888 if (xx != bb) { 889 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 890 } else { 891 b = x; 892 } 893 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 894 xs = x + shift; /* shift by one for index start of 1 */ 895 ls = ls + shift; 896 while (its--) { 897 /* go down through the rows */ 898 for (i=0; i<m; i++) { 899 n = A->i[i+1] - A->i[i]; 900 PLogFlops(4*n+3); 901 idx = A->j + A->i[i] + shift; 902 v = A->a + A->i[i] + shift; 903 sum = b[i]; 904 SPARSEDENSEMDOT(sum,xs,v,idx,n); 905 d = fshift + A->a[diag[i]+shift]; 906 n = B->i[i+1] - B->i[i]; 907 idx = B->j + B->i[i] + shift; 908 v = B->a + B->i[i] + shift; 909 SPARSEDENSEMDOT(sum,ls,v,idx,n); 910 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 911 } 912 /* come up through the rows */ 913 for (i=m-1; i>-1; i--) { 914 n = A->i[i+1] - A->i[i]; 915 PLogFlops(4*n+3) 916 idx = A->j + A->i[i] + shift; 917 v = A->a + A->i[i] + shift; 918 sum = b[i]; 919 SPARSEDENSEMDOT(sum,xs,v,idx,n); 920 d = fshift + A->a[diag[i]+shift]; 921 n = B->i[i+1] - B->i[i]; 922 idx = B->j + B->i[i] + shift; 923 v = B->a + B->i[i] + shift; 924 SPARSEDENSEMDOT(sum,ls,v,idx,n); 925 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 926 } 927 } 928 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 929 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 930 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 931 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 932 if (flag & SOR_ZERO_INITIAL_GUESS) { 933 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 934 PetscFunctionReturn(0); 935 } 936 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 937 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 938 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 939 if (xx != bb) { 940 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 941 } else { 942 b = x; 943 } 944 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 945 xs = x + shift; /* shift by one for index start of 1 */ 946 ls = ls + shift; 947 while (its--) { 948 for (i=0; i<m; i++) { 949 n = A->i[i+1] - A->i[i]; 950 PLogFlops(4*n+3); 951 idx = A->j + A->i[i] + shift; 952 v = A->a + A->i[i] + shift; 953 sum = b[i]; 954 SPARSEDENSEMDOT(sum,xs,v,idx,n); 955 d = fshift + A->a[diag[i]+shift]; 956 n = B->i[i+1] - B->i[i]; 957 idx = B->j + B->i[i] + shift; 958 v = B->a + B->i[i] + shift; 959 SPARSEDENSEMDOT(sum,ls,v,idx,n); 960 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 961 } 962 } 963 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 964 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 965 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 966 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 967 if (flag & SOR_ZERO_INITIAL_GUESS) { 968 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 972 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 973 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 974 if (xx != bb) { 975 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 976 } else { 977 b = x; 978 } 979 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 980 xs = x + shift; /* shift by one for index start of 1 */ 981 ls = ls + shift; 982 while (its--) { 983 for (i=m-1; i>-1; i--) { 984 n = A->i[i+1] - A->i[i]; 985 PLogFlops(4*n+3); 986 idx = A->j + A->i[i] + shift; 987 v = A->a + A->i[i] + shift; 988 sum = b[i]; 989 SPARSEDENSEMDOT(sum,xs,v,idx,n); 990 d = fshift + A->a[diag[i]+shift]; 991 n = B->i[i+1] - B->i[i]; 992 idx = B->j + B->i[i] + shift; 993 v = B->a + B->i[i] + shift; 994 SPARSEDENSEMDOT(sum,ls,v,idx,n); 995 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 996 } 997 } 998 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 999 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 1000 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1001 } else { 1002 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1003 } 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNC__ 1008 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIAIJ" 1009 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1010 { 1011 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1012 Mat A = mat->A,B = mat->B; 1013 int ierr; 1014 PetscReal isend[5],irecv[5]; 1015 1016 PetscFunctionBegin; 1017 info->block_size = 1.0; 1018 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1019 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1020 isend[3] = info->memory; isend[4] = info->mallocs; 1021 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1022 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1023 isend[3] += info->memory; isend[4] += info->mallocs; 1024 if (flag == MAT_LOCAL) { 1025 info->nz_used = isend[0]; 1026 info->nz_allocated = isend[1]; 1027 info->nz_unneeded = isend[2]; 1028 info->memory = isend[3]; 1029 info->mallocs = isend[4]; 1030 } else if (flag == MAT_GLOBAL_MAX) { 1031 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1032 info->nz_used = irecv[0]; 1033 info->nz_allocated = irecv[1]; 1034 info->nz_unneeded = irecv[2]; 1035 info->memory = irecv[3]; 1036 info->mallocs = irecv[4]; 1037 } else if (flag == MAT_GLOBAL_SUM) { 1038 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1039 info->nz_used = irecv[0]; 1040 info->nz_allocated = irecv[1]; 1041 info->nz_unneeded = irecv[2]; 1042 info->memory = irecv[3]; 1043 info->mallocs = irecv[4]; 1044 } 1045 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1046 info->fill_ratio_needed = 0; 1047 info->factor_mallocs = 0; 1048 info->rows_global = (double)mat->M; 1049 info->columns_global = (double)mat->N; 1050 info->rows_local = (double)mat->m; 1051 info->columns_local = (double)mat->N; 1052 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #undef __FUNC__ 1057 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIAIJ" 1058 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1059 { 1060 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1061 int ierr; 1062 1063 PetscFunctionBegin; 1064 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1065 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1066 op == MAT_COLUMNS_UNSORTED || 1067 op == MAT_COLUMNS_SORTED || 1068 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1069 op == MAT_KEEP_ZEROED_ROWS || 1070 op == MAT_NEW_NONZERO_LOCATION_ERR || 1071 op == MAT_USE_INODES || 1072 op == MAT_DO_NOT_USE_INODES || 1073 op == MAT_IGNORE_ZERO_ENTRIES) { 1074 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1075 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1076 } else if (op == MAT_ROW_ORIENTED) { 1077 a->roworiented = PETSC_TRUE; 1078 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1079 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1080 } else if (op == MAT_ROWS_SORTED || 1081 op == MAT_ROWS_UNSORTED || 1082 op == MAT_SYMMETRIC || 1083 op == MAT_STRUCTURALLY_SYMMETRIC || 1084 op == MAT_YES_NEW_DIAGONALS) { 1085 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1086 } else if (op == MAT_COLUMN_ORIENTED) { 1087 a->roworiented = PETSC_FALSE; 1088 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1089 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1090 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1091 a->donotstash = PETSC_TRUE; 1092 } else if (op == MAT_NO_NEW_DIAGONALS){ 1093 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1094 } else { 1095 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1096 } 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNC__ 1101 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIAIJ" 1102 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1103 { 1104 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1105 1106 PetscFunctionBegin; 1107 if (m) *m = mat->M; 1108 if (n) *n = mat->N; 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNC__ 1113 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIAIJ" 1114 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1115 { 1116 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1117 1118 PetscFunctionBegin; 1119 if (m) *m = mat->m; 1120 if (n) *n = mat->n; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNC__ 1125 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIAIJ" 1126 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1127 { 1128 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1129 1130 PetscFunctionBegin; 1131 if (m) *m = mat->rstart; 1132 if (n) *n = mat->rend; 1133 PetscFunctionReturn(0); 1134 } 1135 1136 #undef __FUNC__ 1137 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIAIJ" 1138 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1139 { 1140 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1141 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1142 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1143 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1144 int *cmap,*idx_p; 1145 1146 PetscFunctionBegin; 1147 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1148 mat->getrowactive = PETSC_TRUE; 1149 1150 if (!mat->rowvalues && (idx || v)) { 1151 /* 1152 allocate enough space to hold information from the longest row. 1153 */ 1154 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1155 int max = 1,m = mat->m,tmp; 1156 for (i=0; i<m; i++) { 1157 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1158 if (max < tmp) { max = tmp; } 1159 } 1160 mat->rowvalues = (Scalar*)PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1161 mat->rowindices = (int*)(mat->rowvalues + max); 1162 } 1163 1164 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1165 lrow = row - rstart; 1166 1167 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1168 if (!v) {pvA = 0; pvB = 0;} 1169 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1170 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1171 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1172 nztot = nzA + nzB; 1173 1174 cmap = mat->garray; 1175 if (v || idx) { 1176 if (nztot) { 1177 /* Sort by increasing column numbers, assuming A and B already sorted */ 1178 int imark = -1; 1179 if (v) { 1180 *v = v_p = mat->rowvalues; 1181 for (i=0; i<nzB; i++) { 1182 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1183 else break; 1184 } 1185 imark = i; 1186 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1187 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1188 } 1189 if (idx) { 1190 *idx = idx_p = mat->rowindices; 1191 if (imark > -1) { 1192 for (i=0; i<imark; i++) { 1193 idx_p[i] = cmap[cworkB[i]]; 1194 } 1195 } else { 1196 for (i=0; i<nzB; i++) { 1197 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1198 else break; 1199 } 1200 imark = i; 1201 } 1202 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1203 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1204 } 1205 } else { 1206 if (idx) *idx = 0; 1207 if (v) *v = 0; 1208 } 1209 } 1210 *nz = nztot; 1211 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1212 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 #undef __FUNC__ 1217 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIAIJ" 1218 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1219 { 1220 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1221 1222 PetscFunctionBegin; 1223 if (aij->getrowactive == PETSC_FALSE) { 1224 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1225 } 1226 aij->getrowactive = PETSC_FALSE; 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNC__ 1231 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIAIJ" 1232 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1233 { 1234 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1235 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1236 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1237 PetscReal sum = 0.0; 1238 Scalar *v; 1239 1240 PetscFunctionBegin; 1241 if (aij->size == 1) { 1242 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1243 } else { 1244 if (type == NORM_FROBENIUS) { 1245 v = amat->a; 1246 for (i=0; i<amat->nz; i++) { 1247 #if defined(PETSC_USE_COMPLEX) 1248 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1249 #else 1250 sum += (*v)*(*v); v++; 1251 #endif 1252 } 1253 v = bmat->a; 1254 for (i=0; i<bmat->nz; i++) { 1255 #if defined(PETSC_USE_COMPLEX) 1256 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1257 #else 1258 sum += (*v)*(*v); v++; 1259 #endif 1260 } 1261 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1262 *norm = sqrt(*norm); 1263 } else if (type == NORM_1) { /* max column norm */ 1264 PetscReal *tmp,*tmp2; 1265 int *jj,*garray = aij->garray; 1266 tmp = (PetscReal*)PetscMalloc((aij->N+1)*sizeof(PetscReal));CHKPTRQ(tmp); 1267 tmp2 = (PetscReal*)PetscMalloc((aij->N+1)*sizeof(PetscReal));CHKPTRQ(tmp2); 1268 ierr = PetscMemzero(tmp,aij->N*sizeof(PetscReal));CHKERRQ(ierr); 1269 *norm = 0.0; 1270 v = amat->a; jj = amat->j; 1271 for (j=0; j<amat->nz; j++) { 1272 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1273 } 1274 v = bmat->a; jj = bmat->j; 1275 for (j=0; j<bmat->nz; j++) { 1276 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1277 } 1278 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1279 for (j=0; j<aij->N; j++) { 1280 if (tmp2[j] > *norm) *norm = tmp2[j]; 1281 } 1282 ierr = PetscFree(tmp);CHKERRQ(ierr); 1283 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1284 } else if (type == NORM_INFINITY) { /* max row norm */ 1285 PetscReal ntemp = 0.0; 1286 for (j=0; j<amat->m; j++) { 1287 v = amat->a + amat->i[j] + shift; 1288 sum = 0.0; 1289 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1290 sum += PetscAbsScalar(*v); v++; 1291 } 1292 v = bmat->a + bmat->i[j] + shift; 1293 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1294 sum += PetscAbsScalar(*v); v++; 1295 } 1296 if (sum > ntemp) ntemp = sum; 1297 } 1298 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1299 } else { 1300 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1301 } 1302 } 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNC__ 1307 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIAIJ" 1308 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1309 { 1310 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1311 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1312 int ierr,shift = Aloc->indexshift; 1313 int M = a->M,N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1314 Mat B; 1315 Scalar *array; 1316 1317 PetscFunctionBegin; 1318 if (!matout && M != N) { 1319 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1320 } 1321 1322 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1323 1324 /* copy over the A part */ 1325 Aloc = (Mat_SeqAIJ*)a->A->data; 1326 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1327 row = a->rstart; 1328 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1329 for (i=0; i<m; i++) { 1330 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1331 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1332 } 1333 aj = Aloc->j; 1334 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1335 1336 /* copy over the B part */ 1337 Aloc = (Mat_SeqAIJ*)a->B->data; 1338 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1339 row = a->rstart; 1340 ct = cols = (int*)PetscMalloc((1+ai[m]-shift)*sizeof(int));CHKPTRQ(cols); 1341 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1342 for (i=0; i<m; i++) { 1343 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1344 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1345 } 1346 ierr = PetscFree(ct);CHKERRQ(ierr); 1347 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1348 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1349 if (matout) { 1350 *matout = B; 1351 } else { 1352 PetscOps *Abops; 1353 struct _MatOps *Aops; 1354 1355 /* This isn't really an in-place transpose .... but free data structures from a */ 1356 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 1357 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1358 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1359 #if defined (PETSC_USE_CTABLE) 1360 if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);} 1361 #else 1362 if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);} 1363 #endif 1364 if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);} 1365 if (a->lvec) {ierr = VecDestroy(a->lvec);CHKERRQ(ierr);} 1366 if (a->Mvctx) {ierr = VecScatterDestroy(a->Mvctx);CHKERRQ(ierr);} 1367 ierr = PetscFree(a);CHKERRQ(ierr); 1368 1369 /* 1370 This is horrible, horrible code. We need to keep the 1371 A pointers for the bops and ops but copy everything 1372 else from C. 1373 */ 1374 Abops = A->bops; 1375 Aops = A->ops; 1376 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1377 A->bops = Abops; 1378 A->ops = Aops; 1379 PetscHeaderDestroy(B); 1380 } 1381 PetscFunctionReturn(0); 1382 } 1383 1384 #undef __FUNC__ 1385 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIAIJ" 1386 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1387 { 1388 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1389 Mat a = aij->A,b = aij->B; 1390 int ierr,s1,s2,s3; 1391 1392 PetscFunctionBegin; 1393 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1394 if (rr) { 1395 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1396 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1397 /* Overlap communication with computation. */ 1398 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1399 } 1400 if (ll) { 1401 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1402 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1403 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1404 } 1405 /* scale the diagonal block */ 1406 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1407 1408 if (rr) { 1409 /* Do a scatter end and then right scale the off-diagonal block */ 1410 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1411 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1412 } 1413 1414 PetscFunctionReturn(0); 1415 } 1416 1417 1418 #undef __FUNC__ 1419 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIAIJ" 1420 int MatPrintHelp_MPIAIJ(Mat A) 1421 { 1422 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1423 int ierr; 1424 1425 PetscFunctionBegin; 1426 if (!a->rank) { 1427 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1428 } 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNC__ 1433 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIAIJ" 1434 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1435 { 1436 PetscFunctionBegin; 1437 *bs = 1; 1438 PetscFunctionReturn(0); 1439 } 1440 #undef __FUNC__ 1441 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIAIJ" 1442 int MatSetUnfactored_MPIAIJ(Mat A) 1443 { 1444 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1445 int ierr; 1446 1447 PetscFunctionBegin; 1448 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNC__ 1453 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIAIJ" 1454 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1455 { 1456 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1457 Mat a,b,c,d; 1458 PetscTruth flg; 1459 int ierr; 1460 1461 PetscFunctionBegin; 1462 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1463 a = matA->A; b = matA->B; 1464 c = matB->A; d = matB->B; 1465 1466 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1467 if (flg == PETSC_TRUE) { 1468 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1469 } 1470 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNC__ 1475 #define __FUNC__ /*<a name=""></a>*/"MatCopy_MPIAIJ" 1476 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1477 { 1478 int ierr; 1479 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1480 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1481 1482 PetscFunctionBegin; 1483 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1484 /* because of the column compression in the off-processor part of the matrix a->B, 1485 the number of columns in a->B and b->B may be different, hence we cannot call 1486 the MatCopy() directly on the two parts. If need be, we can provide a more 1487 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1488 then copying the submatrices */ 1489 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1490 } else { 1491 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1492 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1493 } 1494 PetscFunctionReturn(0); 1495 } 1496 1497 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1498 extern int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1499 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1500 extern int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1501 extern int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1502 1503 /* -------------------------------------------------------------------*/ 1504 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1505 MatGetRow_MPIAIJ, 1506 MatRestoreRow_MPIAIJ, 1507 MatMult_MPIAIJ, 1508 MatMultAdd_MPIAIJ, 1509 MatMultTranspose_MPIAIJ, 1510 MatMultTransposeAdd_MPIAIJ, 1511 0, 1512 0, 1513 0, 1514 0, 1515 0, 1516 0, 1517 MatRelax_MPIAIJ, 1518 MatTranspose_MPIAIJ, 1519 MatGetInfo_MPIAIJ, 1520 MatEqual_MPIAIJ, 1521 MatGetDiagonal_MPIAIJ, 1522 MatDiagonalScale_MPIAIJ, 1523 MatNorm_MPIAIJ, 1524 MatAssemblyBegin_MPIAIJ, 1525 MatAssemblyEnd_MPIAIJ, 1526 0, 1527 MatSetOption_MPIAIJ, 1528 MatZeroEntries_MPIAIJ, 1529 MatZeroRows_MPIAIJ, 1530 0, 1531 0, 1532 0, 1533 0, 1534 MatGetSize_MPIAIJ, 1535 MatGetLocalSize_MPIAIJ, 1536 MatGetOwnershipRange_MPIAIJ, 1537 0, 1538 0, 1539 0, 1540 0, 1541 MatDuplicate_MPIAIJ, 1542 0, 1543 0, 1544 0, 1545 0, 1546 0, 1547 MatGetSubMatrices_MPIAIJ, 1548 MatIncreaseOverlap_MPIAIJ, 1549 MatGetValues_MPIAIJ, 1550 MatCopy_MPIAIJ, 1551 MatPrintHelp_MPIAIJ, 1552 MatScale_MPIAIJ, 1553 0, 1554 0, 1555 0, 1556 MatGetBlockSize_MPIAIJ, 1557 0, 1558 0, 1559 0, 1560 0, 1561 MatFDColoringCreate_MPIAIJ, 1562 0, 1563 MatSetUnfactored_MPIAIJ, 1564 0, 1565 0, 1566 MatGetSubMatrix_MPIAIJ, 1567 0, 1568 0, 1569 MatGetMaps_Petsc}; 1570 1571 /* ----------------------------------------------------------------------------------------*/ 1572 1573 EXTERN_C_BEGIN 1574 #undef __FUNC__ 1575 #define __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIAIJ" 1576 int MatStoreValues_MPIAIJ(Mat mat) 1577 { 1578 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1579 int ierr; 1580 1581 PetscFunctionBegin; 1582 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1583 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1584 PetscFunctionReturn(0); 1585 } 1586 EXTERN_C_END 1587 1588 EXTERN_C_BEGIN 1589 #undef __FUNC__ 1590 #define __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIAIJ" 1591 int MatRetrieveValues_MPIAIJ(Mat mat) 1592 { 1593 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1594 int ierr; 1595 1596 PetscFunctionBegin; 1597 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1598 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1599 PetscFunctionReturn(0); 1600 } 1601 EXTERN_C_END 1602 1603 #include "pc.h" 1604 EXTERN_C_BEGIN 1605 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1606 EXTERN_C_END 1607 1608 #undef __FUNC__ 1609 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIAIJ" 1610 /*@C 1611 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1612 (the default parallel PETSc format). For good matrix assembly performance 1613 the user should preallocate the matrix storage by setting the parameters 1614 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1615 performance can be increased by more than a factor of 50. 1616 1617 Collective on MPI_Comm 1618 1619 Input Parameters: 1620 + comm - MPI communicator 1621 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1622 This value should be the same as the local size used in creating the 1623 y vector for the matrix-vector product y = Ax. 1624 . n - This value should be the same as the local size used in creating the 1625 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1626 calculated if N is given) For square matrices n is almost always m. 1627 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1628 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1629 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1630 (same value is used for all local rows) 1631 . d_nnz - array containing the number of nonzeros in the various rows of the 1632 DIAGONAL portion of the local submatrix (possibly different for each row) 1633 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1634 The size of this array is equal to the number of local rows, i.e 'm'. 1635 You must leave room for the diagonal entry even if it is zero. 1636 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1637 submatrix (same value is used for all local rows). 1638 - o_nnz - array containing the number of nonzeros in the various rows of the 1639 OFF-DIAGONAL portion of the local submatrix (possibly different for 1640 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1641 structure. The size of this array is equal to the number 1642 of local rows, i.e 'm'. 1643 1644 Output Parameter: 1645 . A - the matrix 1646 1647 Notes: 1648 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1649 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1650 storage requirements for this matrix. 1651 1652 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1653 processor than it must be used on all processors that share the object for 1654 that argument. 1655 1656 The AIJ format (also called the Yale sparse matrix format or 1657 compressed row storage), is fully compatible with standard Fortran 77 1658 storage. That is, the stored row and column indices can begin at 1659 either one (as in Fortran) or zero. See the users manual for details. 1660 1661 The user MUST specify either the local or global matrix dimensions 1662 (possibly both). 1663 1664 The parallel matrix is partitioned such that the first m0 rows belong to 1665 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1666 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1667 1668 The DIAGONAL portion of the local submatrix of a processor can be defined 1669 as the submatrix which is obtained by extraction the part corresponding 1670 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1671 first row that belongs to the processor, and r2 is the last row belonging 1672 to the this processor. This is a square mxm matrix. The remaining portion 1673 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1674 1675 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1676 1677 By default, this format uses inodes (identical nodes) when possible. 1678 We search for consecutive rows with the same nonzero structure, thereby 1679 reusing matrix information to achieve increased efficiency. 1680 1681 Options Database Keys: 1682 + -mat_aij_no_inode - Do not use inodes 1683 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1684 - -mat_aij_oneindex - Internally use indexing starting at 1 1685 rather than 0. Note that when calling MatSetValues(), 1686 the user still MUST index entries starting at 0! 1687 . -mat_mpi - use the parallel matrix data structures even on one processor 1688 (defaults to using SeqBAIJ format on one processor) 1689 . -mat_mpi - use the parallel matrix data structures even on one processor 1690 (defaults to using SeqAIJ format on one processor) 1691 1692 1693 Example usage: 1694 1695 Consider the following 8x8 matrix with 34 non-zero values, that is 1696 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1697 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1698 as follows: 1699 1700 .vb 1701 1 2 0 | 0 3 0 | 0 4 1702 Proc0 0 5 6 | 7 0 0 | 8 0 1703 9 0 10 | 11 0 0 | 12 0 1704 ------------------------------------- 1705 13 0 14 | 15 16 17 | 0 0 1706 Proc1 0 18 0 | 19 20 21 | 0 0 1707 0 0 0 | 22 23 0 | 24 0 1708 ------------------------------------- 1709 Proc2 25 26 27 | 0 0 28 | 29 0 1710 30 0 0 | 31 32 33 | 0 34 1711 .ve 1712 1713 This can be represented as a collection of submatrices as: 1714 1715 .vb 1716 A B C 1717 D E F 1718 G H I 1719 .ve 1720 1721 Where the submatrices A,B,C are owned by proc0, D,E,F are 1722 owned by proc1, G,H,I are owned by proc2. 1723 1724 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1725 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1726 The 'M','N' parameters are 8,8, and have the same values on all procs. 1727 1728 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1729 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1730 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1731 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1732 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1733 matrix, ans [DF] as another SeqAIJ matrix. 1734 1735 When d_nz, o_nz parameters are specified, d_nz storage elements are 1736 allocated for every row of the local diagonal submatrix, and o_nz 1737 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1738 One way to choose d_nz and o_nz is to use the max nonzerors per local 1739 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1740 In this case, the values of d_nz,o_nz are: 1741 .vb 1742 proc0 : dnz = 2, o_nz = 2 1743 proc1 : dnz = 3, o_nz = 2 1744 proc2 : dnz = 1, o_nz = 4 1745 .ve 1746 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1747 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1748 for proc3. i.e we are using 12+15+10=37 storage locations to store 1749 34 values. 1750 1751 When d_nnz, o_nnz parameters are specified, the storage is specified 1752 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1753 In the above case the values for d_nnz,o_nnz are: 1754 .vb 1755 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1756 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1757 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1758 .ve 1759 Here the space allocated is sum of all the above values i.e 34, and 1760 hence pre-allocation is perfect. 1761 1762 Level: intermediate 1763 1764 .keywords: matrix, aij, compressed row, sparse, parallel 1765 1766 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1767 @*/ 1768 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1769 { 1770 Mat B; 1771 Mat_MPIAIJ *b; 1772 int ierr,i,size; 1773 PetscTruth flag1,flag2; 1774 1775 PetscFunctionBegin; 1776 1777 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 1778 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 1779 if (d_nnz) { 1780 for (i=0; i<m; i++) { 1781 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1782 } 1783 } 1784 if (o_nnz) { 1785 for (i=0; i<m; i++) { 1786 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1787 } 1788 } 1789 1790 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1791 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr); 1792 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1793 if (!flag1 && !flag2 && size == 1) { 1794 if (M == PETSC_DECIDE) M = m; 1795 if (N == PETSC_DECIDE) N = n; 1796 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1797 PetscFunctionReturn(0); 1798 } 1799 1800 *A = 0; 1801 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1802 PLogObjectCreate(B); 1803 B->data = (void*)(b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1804 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1805 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1806 B->ops->destroy = MatDestroy_MPIAIJ; 1807 B->ops->view = MatView_MPIAIJ; 1808 B->factor = 0; 1809 B->assembled = PETSC_FALSE; 1810 B->mapping = 0; 1811 1812 B->insertmode = NOT_SET_VALUES; 1813 b->size = size; 1814 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1815 1816 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 1817 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1818 } 1819 1820 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1821 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1822 b->m = m; B->m = m; 1823 b->n = n; B->n = n; 1824 b->N = N; B->N = N; 1825 b->M = M; B->M = M; 1826 1827 /* the information in the maps duplicates the information computed below, eventually 1828 we should remove the duplicate information that is not contained in the maps */ 1829 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1830 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1831 1832 /* build local table of row and column ownerships */ 1833 b->rowners = (int*)PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1834 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1835 b->cowners = b->rowners + b->size + 2; 1836 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1837 b->rowners[0] = 0; 1838 for (i=2; i<=b->size; i++) { 1839 b->rowners[i] += b->rowners[i-1]; 1840 } 1841 b->rstart = b->rowners[b->rank]; 1842 b->rend = b->rowners[b->rank+1]; 1843 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1844 b->cowners[0] = 0; 1845 for (i=2; i<=b->size; i++) { 1846 b->cowners[i] += b->cowners[i-1]; 1847 } 1848 b->cstart = b->cowners[b->rank]; 1849 b->cend = b->cowners[b->rank+1]; 1850 1851 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1852 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1853 PLogObjectParent(B,b->A); 1854 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1855 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1856 PLogObjectParent(B,b->B); 1857 1858 /* build cache for off array entries formed */ 1859 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 1860 b->donotstash = PETSC_FALSE; 1861 b->colmap = 0; 1862 b->garray = 0; 1863 b->roworiented = PETSC_TRUE; 1864 1865 /* stuff used for matrix vector multiply */ 1866 b->lvec = PETSC_NULL; 1867 b->Mvctx = PETSC_NULL; 1868 1869 /* stuff for MatGetRow() */ 1870 b->rowindices = 0; 1871 b->rowvalues = 0; 1872 b->getrowactive = PETSC_FALSE; 1873 1874 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1875 "MatStoreValues_MPIAIJ", 1876 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1877 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1878 "MatRetrieveValues_MPIAIJ", 1879 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1880 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1881 "MatGetDiagonalBlock_MPIAIJ", 1882 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1883 *A = B; 1884 PetscFunctionReturn(0); 1885 } 1886 1887 #undef __FUNC__ 1888 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIAIJ" 1889 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1890 { 1891 Mat mat; 1892 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1893 int ierr,len = 0; 1894 PetscTruth flg; 1895 1896 PetscFunctionBegin; 1897 *newmat = 0; 1898 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1899 PLogObjectCreate(mat); 1900 mat->data = (void*)(a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a); 1901 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1902 mat->ops->destroy = MatDestroy_MPIAIJ; 1903 mat->ops->view = MatView_MPIAIJ; 1904 mat->factor = matin->factor; 1905 mat->assembled = PETSC_TRUE; 1906 mat->insertmode = NOT_SET_VALUES; 1907 1908 a->m = mat->m = oldmat->m; 1909 a->n = mat->n = oldmat->n; 1910 a->M = mat->M = oldmat->M; 1911 a->N = mat->N = oldmat->N; 1912 1913 a->rstart = oldmat->rstart; 1914 a->rend = oldmat->rend; 1915 a->cstart = oldmat->cstart; 1916 a->cend = oldmat->cend; 1917 a->size = oldmat->size; 1918 a->rank = oldmat->rank; 1919 a->donotstash = oldmat->donotstash; 1920 a->roworiented = oldmat->roworiented; 1921 a->rowindices = 0; 1922 a->rowvalues = 0; 1923 a->getrowactive = PETSC_FALSE; 1924 1925 a->rowners = (int*)PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1926 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1927 a->cowners = a->rowners + a->size + 2; 1928 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1929 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1930 if (oldmat->colmap) { 1931 #if defined (PETSC_USE_CTABLE) 1932 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1933 #else 1934 a->colmap = (int*)PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1935 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1936 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr); 1937 #endif 1938 } else a->colmap = 0; 1939 if (oldmat->garray) { 1940 len = ((Mat_SeqAIJ*)(oldmat->B->data))->n; 1941 a->garray = (int*)PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1942 PLogObjectMemory(mat,len*sizeof(int)); 1943 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1944 } else a->garray = 0; 1945 1946 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1947 PLogObjectParent(mat,a->lvec); 1948 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1949 PLogObjectParent(mat,a->Mvctx); 1950 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1951 PLogObjectParent(mat,a->A); 1952 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1953 PLogObjectParent(mat,a->B); 1954 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1955 if (flg) { 1956 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1957 } 1958 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1959 *newmat = mat; 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #include "sys.h" 1964 1965 #undef __FUNC__ 1966 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIAIJ" 1967 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1968 { 1969 Mat A; 1970 Scalar *vals,*svals; 1971 MPI_Comm comm = ((PetscObject)viewer)->comm; 1972 MPI_Status status; 1973 int i,nz,ierr,j,rstart,rend,fd; 1974 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1975 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1976 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1977 1978 PetscFunctionBegin; 1979 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1980 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1981 if (!rank) { 1982 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1983 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1984 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1985 if (header[3] < 0) { 1986 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1987 } 1988 } 1989 1990 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1991 M = header[1]; N = header[2]; 1992 /* determine ownership of all rows */ 1993 m = M/size + ((M % size) > rank); 1994 rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1995 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1996 rowners[0] = 0; 1997 for (i=2; i<=size; i++) { 1998 rowners[i] += rowners[i-1]; 1999 } 2000 rstart = rowners[rank]; 2001 rend = rowners[rank+1]; 2002 2003 /* distribute row lengths to all processors */ 2004 ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens); 2005 offlens = ourlens + (rend-rstart); 2006 if (!rank) { 2007 rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths); 2008 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2009 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2010 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2011 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2012 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2013 } else { 2014 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2015 } 2016 2017 if (!rank) { 2018 /* calculate the number of nonzeros on each processor */ 2019 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2020 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2021 for (i=0; i<size; i++) { 2022 for (j=rowners[i]; j< rowners[i+1]; j++) { 2023 procsnz[i] += rowlengths[j]; 2024 } 2025 } 2026 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2027 2028 /* determine max buffer needed and allocate it */ 2029 maxnz = 0; 2030 for (i=0; i<size; i++) { 2031 maxnz = PetscMax(maxnz,procsnz[i]); 2032 } 2033 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2034 2035 /* read in my part of the matrix column indices */ 2036 nz = procsnz[0]; 2037 mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols); 2038 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2039 2040 /* read in every one elses and ship off */ 2041 for (i=1; i<size; i++) { 2042 nz = procsnz[i]; 2043 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2044 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2045 } 2046 ierr = PetscFree(cols);CHKERRQ(ierr); 2047 } else { 2048 /* determine buffer space needed for message */ 2049 nz = 0; 2050 for (i=0; i<m; i++) { 2051 nz += ourlens[i]; 2052 } 2053 mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols); 2054 2055 /* receive message of column indices*/ 2056 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2057 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2058 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2059 } 2060 2061 /* determine column ownership if matrix is not square */ 2062 if (N != M) { 2063 n = N/size + ((N % size) > rank); 2064 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2065 cstart = cend - n; 2066 } else { 2067 cstart = rstart; 2068 cend = rend; 2069 n = cend - cstart; 2070 } 2071 2072 /* loop over local rows, determining number of off diagonal entries */ 2073 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2074 jj = 0; 2075 for (i=0; i<m; i++) { 2076 for (j=0; j<ourlens[i]; j++) { 2077 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2078 jj++; 2079 } 2080 } 2081 2082 /* create our matrix */ 2083 for (i=0; i<m; i++) { 2084 ourlens[i] -= offlens[i]; 2085 } 2086 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2087 A = *newmat; 2088 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2089 for (i=0; i<m; i++) { 2090 ourlens[i] += offlens[i]; 2091 } 2092 2093 if (!rank) { 2094 vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals); 2095 2096 /* read in my part of the matrix numerical values */ 2097 nz = procsnz[0]; 2098 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2099 2100 /* insert into matrix */ 2101 jj = rstart; 2102 smycols = mycols; 2103 svals = vals; 2104 for (i=0; i<m; i++) { 2105 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2106 smycols += ourlens[i]; 2107 svals += ourlens[i]; 2108 jj++; 2109 } 2110 2111 /* read in other processors and ship out */ 2112 for (i=1; i<size; i++) { 2113 nz = procsnz[i]; 2114 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2115 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2116 } 2117 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2118 } else { 2119 /* receive numeric values */ 2120 vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals); 2121 2122 /* receive message of values*/ 2123 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2124 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2125 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2126 2127 /* insert into matrix */ 2128 jj = rstart; 2129 smycols = mycols; 2130 svals = vals; 2131 for (i=0; i<m; i++) { 2132 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2133 smycols += ourlens[i]; 2134 svals += ourlens[i]; 2135 jj++; 2136 } 2137 } 2138 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2139 ierr = PetscFree(vals);CHKERRQ(ierr); 2140 ierr = PetscFree(mycols);CHKERRQ(ierr); 2141 ierr = PetscFree(rowners);CHKERRQ(ierr); 2142 2143 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2144 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2145 PetscFunctionReturn(0); 2146 } 2147 2148 #undef __FUNC__ 2149 #define __FUNC__ /*<a name=""></a>*/"MatGetSubMatrix_MPIAIJ" 2150 /* 2151 Not great since it makes two copies of the submatrix, first an SeqAIJ 2152 in local and then by concatenating the local matrices the end result. 2153 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2154 */ 2155 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2156 { 2157 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2158 Mat *local,M,Mreuse; 2159 Scalar *vwork,*aa; 2160 MPI_Comm comm = mat->comm; 2161 Mat_SeqAIJ *aij; 2162 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 2163 2164 PetscFunctionBegin; 2165 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2166 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2167 2168 if (call == MAT_REUSE_MATRIX) { 2169 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2170 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before,cannot reuse"); 2171 local = &Mreuse; 2172 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2173 } else { 2174 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2175 Mreuse = *local; 2176 ierr = PetscFree(local);CHKERRQ(ierr); 2177 } 2178 2179 /* 2180 m - number of local rows 2181 n - number of columns (same on all processors) 2182 rstart - first row in new global matrix generated 2183 */ 2184 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2185 if (call == MAT_INITIAL_MATRIX) { 2186 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2187 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2188 ii = aij->i; 2189 jj = aij->j; 2190 2191 /* 2192 Determine the number of non-zeros in the diagonal and off-diagonal 2193 portions of the matrix in order to do correct preallocation 2194 */ 2195 2196 /* first get start and end of "diagonal" columns */ 2197 if (csize == PETSC_DECIDE) { 2198 nlocal = n/size + ((n % size) > rank); 2199 } else { 2200 nlocal = csize; 2201 } 2202 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2203 rstart = rend - nlocal; 2204 if (rank == size - 1 && rend != n) { 2205 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2206 } 2207 2208 /* next, compute all the lengths */ 2209 dlens = (int*)PetscMalloc((2*m+1)*sizeof(int));CHKPTRQ(dlens); 2210 olens = dlens + m; 2211 for (i=0; i<m; i++) { 2212 jend = ii[i+1] - ii[i]; 2213 olen = 0; 2214 dlen = 0; 2215 for (j=0; j<jend; j++) { 2216 if (*jj < rstart || *jj >= rend) olen++; 2217 else dlen++; 2218 jj++; 2219 } 2220 olens[i] = olen; 2221 dlens[i] = dlen; 2222 } 2223 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2224 ierr = PetscFree(dlens);CHKERRQ(ierr); 2225 } else { 2226 int ml,nl; 2227 2228 M = *newmat; 2229 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2230 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2231 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2232 /* 2233 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2234 rather than the slower MatSetValues(). 2235 */ 2236 M->was_assembled = PETSC_TRUE; 2237 M->assembled = PETSC_FALSE; 2238 } 2239 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2240 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2241 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2242 ii = aij->i; 2243 jj = aij->j; 2244 aa = aij->a; 2245 for (i=0; i<m; i++) { 2246 row = rstart + i; 2247 nz = ii[i+1] - ii[i]; 2248 cwork = jj; jj += nz; 2249 vwork = aa; aa += nz; 2250 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2251 } 2252 2253 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2254 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2255 *newmat = M; 2256 2257 /* save submatrix used in processor for next request */ 2258 if (call == MAT_INITIAL_MATRIX) { 2259 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2260 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2261 } 2262 2263 PetscFunctionReturn(0); 2264 } 2265