1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 PetscScalar *val; 356 InsertMode addv = mat->insertmode; 357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 358 PetscTruth flag; 359 #endif 360 361 PetscFunctionBegin; 362 if (!aij->donotstash) { 363 while (1) { 364 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 365 if (!flg) break; 366 367 for (i=0; i<n;) { 368 /* Now identify the consecutive vals belonging to the same row */ 369 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 370 if (j < n) ncols = j-i; 371 else ncols = n-i; 372 /* Now assemble all these values with a single function call */ 373 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 374 i = j; 375 } 376 } 377 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 378 } 379 380 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 381 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 382 383 /* determine if any processor has disassembled, if so we must 384 also disassemble ourselfs, in order that we may reassemble. */ 385 /* 386 if nonzero structure of submatrix B cannot change then we know that 387 no processor disassembled thus we can skip this stuff 388 */ 389 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 390 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 391 if (mat->was_assembled && !other_disassembled) { 392 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 } 395 396 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 397 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 398 } 399 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 400 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 401 402 if (aij->rowvalues) { 403 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 404 aij->rowvalues = 0; 405 } 406 #if defined(PETSC_HAVE_SUPERLUDIST) 407 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 408 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 409 #endif 410 411 #if defined(PETSC_HAVE_SPOOLES) 412 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 413 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 414 #endif 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 420 int MatZeroEntries_MPIAIJ(Mat A) 421 { 422 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 423 int ierr; 424 425 PetscFunctionBegin; 426 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 427 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 428 PetscFunctionReturn(0); 429 } 430 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatZeroRows_MPIAIJ" 433 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 434 { 435 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 436 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 437 int *procs,*nprocs,j,idx,nsends,*work,row; 438 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 439 int *rvalues,tag = A->tag,count,base,slen,n,*source; 440 int *lens,imdex,*lrows,*values,rstart=l->rstart; 441 MPI_Comm comm = A->comm; 442 MPI_Request *send_waits,*recv_waits; 443 MPI_Status recv_status,*send_status; 444 IS istmp; 445 PetscTruth found; 446 447 PetscFunctionBegin; 448 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 449 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 450 451 /* first count number of contributors to each processor */ 452 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 453 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 454 procs = nprocs + size; 455 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 456 for (i=0; i<N; i++) { 457 idx = rows[i]; 458 found = PETSC_FALSE; 459 for (j=0; j<size; j++) { 460 if (idx >= owners[j] && idx < owners[j+1]) { 461 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 462 } 463 } 464 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 465 } 466 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 467 468 /* inform other processors of number of messages and max length*/ 469 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 470 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 471 nrecvs = work[size+rank]; 472 nmax = work[rank]; 473 ierr = PetscFree(work);CHKERRQ(ierr); 474 475 /* post receives: */ 476 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 477 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 478 for (i=0; i<nrecvs; i++) { 479 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 480 } 481 482 /* do sends: 483 1) starts[i] gives the starting index in svalues for stuff going to 484 the ith processor 485 */ 486 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 487 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 488 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 489 starts[0] = 0; 490 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 491 for (i=0; i<N; i++) { 492 svalues[starts[owner[i]]++] = rows[i]; 493 } 494 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 495 496 starts[0] = 0; 497 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 498 count = 0; 499 for (i=0; i<size; i++) { 500 if (procs[i]) { 501 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 502 } 503 } 504 ierr = PetscFree(starts);CHKERRQ(ierr); 505 506 base = owners[rank]; 507 508 /* wait on receives */ 509 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 510 source = lens + nrecvs; 511 count = nrecvs; slen = 0; 512 while (count) { 513 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 514 /* unpack receives into our local space */ 515 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 516 source[imdex] = recv_status.MPI_SOURCE; 517 lens[imdex] = n; 518 slen += n; 519 count--; 520 } 521 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 522 523 /* move the data into the send scatter */ 524 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 525 count = 0; 526 for (i=0; i<nrecvs; i++) { 527 values = rvalues + i*nmax; 528 for (j=0; j<lens[i]; j++) { 529 lrows[count++] = values[j] - base; 530 } 531 } 532 ierr = PetscFree(rvalues);CHKERRQ(ierr); 533 ierr = PetscFree(lens);CHKERRQ(ierr); 534 ierr = PetscFree(owner);CHKERRQ(ierr); 535 ierr = PetscFree(nprocs);CHKERRQ(ierr); 536 537 /* actually zap the local rows */ 538 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 539 PetscLogObjectParent(A,istmp); 540 541 /* 542 Zero the required rows. If the "diagonal block" of the matrix 543 is square and the user wishes to set the diagonal we use seperate 544 code so that MatSetValues() is not called for each diagonal allocating 545 new memory, thus calling lots of mallocs and slowing things down. 546 547 Contributed by: Mathew Knepley 548 */ 549 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 550 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 551 if (diag && (l->A->M == l->A->N)) { 552 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 553 } else if (diag) { 554 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 555 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 556 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 557 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 558 } 559 for (i = 0; i < slen; i++) { 560 row = lrows[i] + rstart; 561 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 562 } 563 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 564 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 565 } else { 566 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 567 } 568 ierr = ISDestroy(istmp);CHKERRQ(ierr); 569 ierr = PetscFree(lrows);CHKERRQ(ierr); 570 571 /* wait on sends */ 572 if (nsends) { 573 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 574 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 575 ierr = PetscFree(send_status);CHKERRQ(ierr); 576 } 577 ierr = PetscFree(send_waits);CHKERRQ(ierr); 578 ierr = PetscFree(svalues);CHKERRQ(ierr); 579 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatMult_MPIAIJ" 585 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 586 { 587 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 588 int ierr,nt; 589 590 PetscFunctionBegin; 591 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 592 if (nt != A->n) { 593 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 594 } 595 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 596 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 597 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 599 PetscFunctionReturn(0); 600 } 601 602 #undef __FUNCT__ 603 #define __FUNCT__ "MatMultAdd_MPIAIJ" 604 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 605 { 606 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 607 int ierr; 608 609 PetscFunctionBegin; 610 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 611 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 612 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 613 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 614 PetscFunctionReturn(0); 615 } 616 617 #undef __FUNCT__ 618 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 619 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 620 { 621 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 622 int ierr; 623 624 PetscFunctionBegin; 625 /* do nondiagonal part */ 626 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 627 /* send it on its way */ 628 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 629 /* do local part */ 630 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 631 /* receive remote parts: note this assumes the values are not actually */ 632 /* inserted in yy until the next line, which is true for my implementation*/ 633 /* but is not perhaps always true. */ 634 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 635 PetscFunctionReturn(0); 636 } 637 638 #undef __FUNCT__ 639 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 640 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 641 { 642 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 643 int ierr; 644 645 PetscFunctionBegin; 646 /* do nondiagonal part */ 647 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 648 /* send it on its way */ 649 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 650 /* do local part */ 651 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 652 /* receive remote parts: note this assumes the values are not actually */ 653 /* inserted in yy until the next line, which is true for my implementation*/ 654 /* but is not perhaps always true. */ 655 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 656 PetscFunctionReturn(0); 657 } 658 659 /* 660 This only works correctly for square matrices where the subblock A->A is the 661 diagonal block 662 */ 663 #undef __FUNCT__ 664 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 665 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 666 { 667 int ierr; 668 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 669 670 PetscFunctionBegin; 671 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 672 if (a->rstart != a->cstart || a->rend != a->cend) { 673 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 674 } 675 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatScale_MPIAIJ" 681 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 682 { 683 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 684 int ierr; 685 686 PetscFunctionBegin; 687 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 688 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 692 #undef __FUNCT__ 693 #define __FUNCT__ "MatDestroy_MPIAIJ" 694 int MatDestroy_MPIAIJ(Mat mat) 695 { 696 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 697 int ierr; 698 699 PetscFunctionBegin; 700 #if defined(PETSC_USE_LOG) 701 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 702 #endif 703 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 704 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 705 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 706 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 707 #if defined (PETSC_USE_CTABLE) 708 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 709 #else 710 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 711 #endif 712 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 713 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 714 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 715 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 716 ierr = PetscFree(aij);CHKERRQ(ierr); 717 PetscFunctionReturn(0); 718 } 719 720 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 721 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 725 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 726 { 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 728 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 729 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 730 PetscTruth isdraw,isascii,flg; 731 PetscViewer sviewer; 732 PetscViewerFormat format; 733 734 PetscFunctionBegin; 735 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 736 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 737 if (isascii) { 738 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 739 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 740 MatInfo info; 741 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 742 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 743 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 744 if (flg) { 745 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 746 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 747 } else { 748 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 749 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 750 } 751 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 752 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 753 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 754 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 755 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 756 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } else if (format == PETSC_VIEWER_ASCII_INFO) { 759 PetscFunctionReturn(0); 760 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 761 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 762 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 763 #endif 764 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 765 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 766 #endif 767 768 PetscFunctionReturn(0); 769 } 770 } else if (isdraw) { 771 PetscDraw draw; 772 PetscTruth isnull; 773 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 774 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 775 } 776 777 if (size == 1) { 778 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 779 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 780 } else { 781 /* assemble the entire matrix onto first processor. */ 782 Mat A; 783 Mat_SeqAIJ *Aloc; 784 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 785 PetscScalar *a; 786 787 if (!rank) { 788 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 789 } else { 790 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 791 } 792 PetscLogObjectParent(mat,A); 793 794 /* copy over the A part */ 795 Aloc = (Mat_SeqAIJ*)aij->A->data; 796 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 797 row = aij->rstart; 798 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 799 for (i=0; i<m; i++) { 800 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 801 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 802 } 803 aj = Aloc->j; 804 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 805 806 /* copy over the B part */ 807 Aloc = (Mat_SeqAIJ*)aij->B->data; 808 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 809 row = aij->rstart; 810 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 811 ct = cols; 812 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 813 for (i=0; i<m; i++) { 814 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 815 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 816 } 817 ierr = PetscFree(ct);CHKERRQ(ierr); 818 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 819 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 820 /* 821 Everyone has to call to draw the matrix since the graphics waits are 822 synchronized across all processors that share the PetscDraw object 823 */ 824 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 825 if (!rank) { 826 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 827 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 828 } 829 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 830 ierr = MatDestroy(A);CHKERRQ(ierr); 831 } 832 PetscFunctionReturn(0); 833 } 834 835 #undef __FUNCT__ 836 #define __FUNCT__ "MatView_MPIAIJ" 837 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 838 { 839 int ierr; 840 PetscTruth isascii,isdraw,issocket,isbinary; 841 842 PetscFunctionBegin; 843 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 844 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 845 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 846 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 847 if (isascii || isdraw || isbinary || issocket) { 848 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 849 } else { 850 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 851 } 852 PetscFunctionReturn(0); 853 } 854 855 856 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatRelax_MPIAIJ" 859 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 860 { 861 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 862 int ierr; 863 Vec bb1; 864 PetscScalar mone=-1.0; 865 866 PetscFunctionBegin; 867 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 868 869 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 870 871 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 872 if (flag & SOR_ZERO_INITIAL_GUESS) { 873 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 874 its--; 875 } 876 877 while (its--) { 878 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 879 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 880 881 /* update rhs: bb1 = bb - B*x */ 882 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 883 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 884 885 /* local sweep */ 886 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 887 CHKERRQ(ierr); 888 } 889 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 890 if (flag & SOR_ZERO_INITIAL_GUESS) { 891 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 892 its--; 893 } 894 while (its--) { 895 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 896 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 897 898 /* update rhs: bb1 = bb - B*x */ 899 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 900 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 901 902 /* local sweep */ 903 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 904 CHKERRQ(ierr); 905 } 906 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 907 if (flag & SOR_ZERO_INITIAL_GUESS) { 908 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 909 its--; 910 } 911 while (its--) { 912 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 913 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 914 915 /* update rhs: bb1 = bb - B*x */ 916 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 917 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 918 919 /* local sweep */ 920 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 921 CHKERRQ(ierr); 922 } 923 } else { 924 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 925 } 926 927 ierr = VecDestroy(bb1);CHKERRQ(ierr); 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatGetInfo_MPIAIJ" 933 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 934 { 935 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 936 Mat A = mat->A,B = mat->B; 937 int ierr; 938 PetscReal isend[5],irecv[5]; 939 940 PetscFunctionBegin; 941 info->block_size = 1.0; 942 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 943 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 944 isend[3] = info->memory; isend[4] = info->mallocs; 945 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 946 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 947 isend[3] += info->memory; isend[4] += info->mallocs; 948 if (flag == MAT_LOCAL) { 949 info->nz_used = isend[0]; 950 info->nz_allocated = isend[1]; 951 info->nz_unneeded = isend[2]; 952 info->memory = isend[3]; 953 info->mallocs = isend[4]; 954 } else if (flag == MAT_GLOBAL_MAX) { 955 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 956 info->nz_used = irecv[0]; 957 info->nz_allocated = irecv[1]; 958 info->nz_unneeded = irecv[2]; 959 info->memory = irecv[3]; 960 info->mallocs = irecv[4]; 961 } else if (flag == MAT_GLOBAL_SUM) { 962 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 963 info->nz_used = irecv[0]; 964 info->nz_allocated = irecv[1]; 965 info->nz_unneeded = irecv[2]; 966 info->memory = irecv[3]; 967 info->mallocs = irecv[4]; 968 } 969 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 970 info->fill_ratio_needed = 0; 971 info->factor_mallocs = 0; 972 info->rows_global = (double)matin->M; 973 info->columns_global = (double)matin->N; 974 info->rows_local = (double)matin->m; 975 info->columns_local = (double)matin->N; 976 977 PetscFunctionReturn(0); 978 } 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatSetOption_MPIAIJ" 982 int MatSetOption_MPIAIJ(Mat A,MatOption op) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 985 int ierr; 986 987 PetscFunctionBegin; 988 switch (op) { 989 case MAT_NO_NEW_NONZERO_LOCATIONS: 990 case MAT_YES_NEW_NONZERO_LOCATIONS: 991 case MAT_COLUMNS_UNSORTED: 992 case MAT_COLUMNS_SORTED: 993 case MAT_NEW_NONZERO_ALLOCATION_ERR: 994 case MAT_KEEP_ZEROED_ROWS: 995 case MAT_NEW_NONZERO_LOCATION_ERR: 996 case MAT_USE_INODES: 997 case MAT_DO_NOT_USE_INODES: 998 case MAT_IGNORE_ZERO_ENTRIES: 999 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1000 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1001 break; 1002 case MAT_ROW_ORIENTED: 1003 a->roworiented = PETSC_TRUE; 1004 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1005 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1006 break; 1007 case MAT_ROWS_SORTED: 1008 case MAT_ROWS_UNSORTED: 1009 case MAT_YES_NEW_DIAGONALS: 1010 case MAT_USE_SINGLE_PRECISION_SOLVES: 1011 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1012 break; 1013 case MAT_COLUMN_ORIENTED: 1014 a->roworiented = PETSC_FALSE; 1015 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1016 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1017 break; 1018 case MAT_IGNORE_OFF_PROC_ENTRIES: 1019 a->donotstash = PETSC_TRUE; 1020 break; 1021 case MAT_NO_NEW_DIAGONALS: 1022 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1023 default: 1024 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1025 } 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatGetRow_MPIAIJ" 1031 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1032 { 1033 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1034 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1035 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1036 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1037 int *cmap,*idx_p; 1038 1039 PetscFunctionBegin; 1040 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1041 mat->getrowactive = PETSC_TRUE; 1042 1043 if (!mat->rowvalues && (idx || v)) { 1044 /* 1045 allocate enough space to hold information from the longest row. 1046 */ 1047 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1048 int max = 1,tmp; 1049 for (i=0; i<matin->m; i++) { 1050 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1051 if (max < tmp) { max = tmp; } 1052 } 1053 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1054 mat->rowindices = (int*)(mat->rowvalues + max); 1055 } 1056 1057 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1058 lrow = row - rstart; 1059 1060 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1061 if (!v) {pvA = 0; pvB = 0;} 1062 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1063 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1064 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1065 nztot = nzA + nzB; 1066 1067 cmap = mat->garray; 1068 if (v || idx) { 1069 if (nztot) { 1070 /* Sort by increasing column numbers, assuming A and B already sorted */ 1071 int imark = -1; 1072 if (v) { 1073 *v = v_p = mat->rowvalues; 1074 for (i=0; i<nzB; i++) { 1075 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1076 else break; 1077 } 1078 imark = i; 1079 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1080 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1081 } 1082 if (idx) { 1083 *idx = idx_p = mat->rowindices; 1084 if (imark > -1) { 1085 for (i=0; i<imark; i++) { 1086 idx_p[i] = cmap[cworkB[i]]; 1087 } 1088 } else { 1089 for (i=0; i<nzB; i++) { 1090 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1091 else break; 1092 } 1093 imark = i; 1094 } 1095 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1096 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1097 } 1098 } else { 1099 if (idx) *idx = 0; 1100 if (v) *v = 0; 1101 } 1102 } 1103 *nz = nztot; 1104 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1105 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1111 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1112 { 1113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1114 1115 PetscFunctionBegin; 1116 if (aij->getrowactive == PETSC_FALSE) { 1117 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1118 } 1119 aij->getrowactive = PETSC_FALSE; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatNorm_MPIAIJ" 1125 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1126 { 1127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1128 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1129 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1130 PetscReal sum = 0.0; 1131 PetscScalar *v; 1132 1133 PetscFunctionBegin; 1134 if (aij->size == 1) { 1135 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1136 } else { 1137 if (type == NORM_FROBENIUS) { 1138 v = amat->a; 1139 for (i=0; i<amat->nz; i++) { 1140 #if defined(PETSC_USE_COMPLEX) 1141 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1142 #else 1143 sum += (*v)*(*v); v++; 1144 #endif 1145 } 1146 v = bmat->a; 1147 for (i=0; i<bmat->nz; i++) { 1148 #if defined(PETSC_USE_COMPLEX) 1149 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1150 #else 1151 sum += (*v)*(*v); v++; 1152 #endif 1153 } 1154 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1155 *norm = sqrt(*norm); 1156 } else if (type == NORM_1) { /* max column norm */ 1157 PetscReal *tmp,*tmp2; 1158 int *jj,*garray = aij->garray; 1159 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1160 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1161 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1162 *norm = 0.0; 1163 v = amat->a; jj = amat->j; 1164 for (j=0; j<amat->nz; j++) { 1165 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1166 } 1167 v = bmat->a; jj = bmat->j; 1168 for (j=0; j<bmat->nz; j++) { 1169 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1170 } 1171 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1172 for (j=0; j<mat->N; j++) { 1173 if (tmp2[j] > *norm) *norm = tmp2[j]; 1174 } 1175 ierr = PetscFree(tmp);CHKERRQ(ierr); 1176 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1177 } else if (type == NORM_INFINITY) { /* max row norm */ 1178 PetscReal ntemp = 0.0; 1179 for (j=0; j<aij->A->m; j++) { 1180 v = amat->a + amat->i[j] + shift; 1181 sum = 0.0; 1182 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1183 sum += PetscAbsScalar(*v); v++; 1184 } 1185 v = bmat->a + bmat->i[j] + shift; 1186 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1187 sum += PetscAbsScalar(*v); v++; 1188 } 1189 if (sum > ntemp) ntemp = sum; 1190 } 1191 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1192 } else { 1193 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1194 } 1195 } 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatTranspose_MPIAIJ" 1201 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1205 int ierr,shift = Aloc->indexshift; 1206 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1207 Mat B; 1208 PetscScalar *array; 1209 1210 PetscFunctionBegin; 1211 if (!matout && M != N) { 1212 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1213 } 1214 1215 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1216 1217 /* copy over the A part */ 1218 Aloc = (Mat_SeqAIJ*)a->A->data; 1219 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1220 row = a->rstart; 1221 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1222 for (i=0; i<m; i++) { 1223 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1224 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1225 } 1226 aj = Aloc->j; 1227 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1228 1229 /* copy over the B part */ 1230 Aloc = (Mat_SeqAIJ*)a->B->data; 1231 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1232 row = a->rstart; 1233 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1234 ct = cols; 1235 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1236 for (i=0; i<m; i++) { 1237 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1238 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1239 } 1240 ierr = PetscFree(ct);CHKERRQ(ierr); 1241 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1242 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1243 if (matout) { 1244 *matout = B; 1245 } else { 1246 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1247 } 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1253 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1254 { 1255 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1256 Mat a = aij->A,b = aij->B; 1257 int ierr,s1,s2,s3; 1258 1259 PetscFunctionBegin; 1260 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1261 if (rr) { 1262 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1263 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1264 /* Overlap communication with computation. */ 1265 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1266 } 1267 if (ll) { 1268 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1269 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1270 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1271 } 1272 /* scale the diagonal block */ 1273 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1274 1275 if (rr) { 1276 /* Do a scatter end and then right scale the off-diagonal block */ 1277 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1278 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1279 } 1280 1281 PetscFunctionReturn(0); 1282 } 1283 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1287 int MatPrintHelp_MPIAIJ(Mat A) 1288 { 1289 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1290 int ierr; 1291 1292 PetscFunctionBegin; 1293 if (!a->rank) { 1294 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1295 } 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1301 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1302 { 1303 PetscFunctionBegin; 1304 *bs = 1; 1305 PetscFunctionReturn(0); 1306 } 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1309 int MatSetUnfactored_MPIAIJ(Mat A) 1310 { 1311 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1312 int ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatEqual_MPIAIJ" 1321 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1322 { 1323 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1324 Mat a,b,c,d; 1325 PetscTruth flg; 1326 int ierr; 1327 1328 PetscFunctionBegin; 1329 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1330 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1331 a = matA->A; b = matA->B; 1332 c = matB->A; d = matB->B; 1333 1334 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1335 if (flg == PETSC_TRUE) { 1336 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1337 } 1338 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatCopy_MPIAIJ" 1344 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1345 { 1346 int ierr; 1347 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1348 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1349 PetscTruth flg; 1350 1351 PetscFunctionBegin; 1352 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1353 if (str != SAME_NONZERO_PATTERN || !flg) { 1354 /* because of the column compression in the off-processor part of the matrix a->B, 1355 the number of columns in a->B and b->B may be different, hence we cannot call 1356 the MatCopy() directly on the two parts. If need be, we can provide a more 1357 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1358 then copying the submatrices */ 1359 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1360 } else { 1361 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1362 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1363 } 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1369 int MatSetUpPreallocation_MPIAIJ(Mat A) 1370 { 1371 int ierr; 1372 1373 PetscFunctionBegin; 1374 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1379 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1380 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1381 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1382 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1383 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1384 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1385 #endif 1386 1387 #include "petscblaslapack.h" 1388 1389 #undef __FUNCT__ 1390 #define __FUNCT__ "MatAXPY_MPIAIJ" 1391 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1392 { 1393 int ierr,one; 1394 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1395 Mat_SeqAIJ *x,*y; 1396 1397 PetscFunctionBegin; 1398 if (str == SAME_NONZERO_PATTERN) { 1399 x = (Mat_SeqAIJ *)xx->A->data; 1400 y = (Mat_SeqAIJ *)yy->A->data; 1401 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1402 x = (Mat_SeqAIJ *)xx->B->data; 1403 y = (Mat_SeqAIJ *)yy->B->data; 1404 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1405 } else { 1406 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 /* -------------------------------------------------------------------*/ 1412 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1413 MatGetRow_MPIAIJ, 1414 MatRestoreRow_MPIAIJ, 1415 MatMult_MPIAIJ, 1416 MatMultAdd_MPIAIJ, 1417 MatMultTranspose_MPIAIJ, 1418 MatMultTransposeAdd_MPIAIJ, 1419 0, 1420 0, 1421 0, 1422 0, 1423 0, 1424 0, 1425 MatRelax_MPIAIJ, 1426 MatTranspose_MPIAIJ, 1427 MatGetInfo_MPIAIJ, 1428 MatEqual_MPIAIJ, 1429 MatGetDiagonal_MPIAIJ, 1430 MatDiagonalScale_MPIAIJ, 1431 MatNorm_MPIAIJ, 1432 MatAssemblyBegin_MPIAIJ, 1433 MatAssemblyEnd_MPIAIJ, 1434 0, 1435 MatSetOption_MPIAIJ, 1436 MatZeroEntries_MPIAIJ, 1437 MatZeroRows_MPIAIJ, 1438 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1439 MatLUFactorSymbolic_MPIAIJ_TFS, 1440 #else 1441 0, 1442 #endif 1443 0, 1444 0, 1445 0, 1446 MatSetUpPreallocation_MPIAIJ, 1447 0, 1448 0, 1449 0, 1450 0, 1451 MatDuplicate_MPIAIJ, 1452 0, 1453 0, 1454 0, 1455 0, 1456 MatAXPY_MPIAIJ, 1457 MatGetSubMatrices_MPIAIJ, 1458 MatIncreaseOverlap_MPIAIJ, 1459 MatGetValues_MPIAIJ, 1460 MatCopy_MPIAIJ, 1461 MatPrintHelp_MPIAIJ, 1462 MatScale_MPIAIJ, 1463 0, 1464 0, 1465 0, 1466 MatGetBlockSize_MPIAIJ, 1467 0, 1468 0, 1469 0, 1470 0, 1471 MatFDColoringCreate_MPIAIJ, 1472 0, 1473 MatSetUnfactored_MPIAIJ, 1474 0, 1475 0, 1476 MatGetSubMatrix_MPIAIJ, 1477 MatDestroy_MPIAIJ, 1478 MatView_MPIAIJ, 1479 MatGetPetscMaps_Petsc, 1480 0, 1481 0, 1482 0, 1483 0, 1484 0, 1485 0, 1486 0, 1487 0, 1488 MatSetColoring_MPIAIJ, 1489 MatSetValuesAdic_MPIAIJ, 1490 MatSetValuesAdifor_MPIAIJ 1491 }; 1492 1493 /* ----------------------------------------------------------------------------------------*/ 1494 1495 EXTERN_C_BEGIN 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1498 int MatStoreValues_MPIAIJ(Mat mat) 1499 { 1500 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1501 int ierr; 1502 1503 PetscFunctionBegin; 1504 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1505 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1506 PetscFunctionReturn(0); 1507 } 1508 EXTERN_C_END 1509 1510 EXTERN_C_BEGIN 1511 #undef __FUNCT__ 1512 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1513 int MatRetrieveValues_MPIAIJ(Mat mat) 1514 { 1515 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1516 int ierr; 1517 1518 PetscFunctionBegin; 1519 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1520 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1521 PetscFunctionReturn(0); 1522 } 1523 EXTERN_C_END 1524 1525 #include "petscpc.h" 1526 EXTERN_C_BEGIN 1527 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1528 EXTERN_C_END 1529 1530 EXTERN_C_BEGIN 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatCreate_MPIAIJ" 1533 int MatCreate_MPIAIJ(Mat B) 1534 { 1535 Mat_MPIAIJ *b; 1536 int ierr,i,size; 1537 1538 PetscFunctionBegin; 1539 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1540 1541 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1542 B->data = (void*)b; 1543 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1544 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1545 B->factor = 0; 1546 B->assembled = PETSC_FALSE; 1547 B->mapping = 0; 1548 1549 B->insertmode = NOT_SET_VALUES; 1550 b->size = size; 1551 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1552 1553 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1554 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1555 1556 /* the information in the maps duplicates the information computed below, eventually 1557 we should remove the duplicate information that is not contained in the maps */ 1558 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1559 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1560 1561 /* build local table of row and column ownerships */ 1562 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1563 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1564 b->cowners = b->rowners + b->size + 2; 1565 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1566 b->rowners[0] = 0; 1567 for (i=2; i<=b->size; i++) { 1568 b->rowners[i] += b->rowners[i-1]; 1569 } 1570 b->rstart = b->rowners[b->rank]; 1571 b->rend = b->rowners[b->rank+1]; 1572 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1573 b->cowners[0] = 0; 1574 for (i=2; i<=b->size; i++) { 1575 b->cowners[i] += b->cowners[i-1]; 1576 } 1577 b->cstart = b->cowners[b->rank]; 1578 b->cend = b->cowners[b->rank+1]; 1579 1580 /* build cache for off array entries formed */ 1581 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1582 b->donotstash = PETSC_FALSE; 1583 b->colmap = 0; 1584 b->garray = 0; 1585 b->roworiented = PETSC_TRUE; 1586 1587 /* stuff used for matrix vector multiply */ 1588 b->lvec = PETSC_NULL; 1589 b->Mvctx = PETSC_NULL; 1590 1591 /* stuff for MatGetRow() */ 1592 b->rowindices = 0; 1593 b->rowvalues = 0; 1594 b->getrowactive = PETSC_FALSE; 1595 1596 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1597 "MatStoreValues_MPIAIJ", 1598 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1600 "MatRetrieveValues_MPIAIJ", 1601 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1603 "MatGetDiagonalBlock_MPIAIJ", 1604 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1605 1606 PetscFunctionReturn(0); 1607 } 1608 EXTERN_C_END 1609 1610 #undef __FUNCT__ 1611 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1612 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1613 { 1614 Mat mat; 1615 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1616 int ierr; 1617 1618 PetscFunctionBegin; 1619 *newmat = 0; 1620 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1621 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1622 a = (Mat_MPIAIJ*)mat->data; 1623 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1624 mat->factor = matin->factor; 1625 mat->assembled = PETSC_TRUE; 1626 mat->insertmode = NOT_SET_VALUES; 1627 mat->preallocated = PETSC_TRUE; 1628 1629 a->rstart = oldmat->rstart; 1630 a->rend = oldmat->rend; 1631 a->cstart = oldmat->cstart; 1632 a->cend = oldmat->cend; 1633 a->size = oldmat->size; 1634 a->rank = oldmat->rank; 1635 a->donotstash = oldmat->donotstash; 1636 a->roworiented = oldmat->roworiented; 1637 a->rowindices = 0; 1638 a->rowvalues = 0; 1639 a->getrowactive = PETSC_FALSE; 1640 1641 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1642 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1643 if (oldmat->colmap) { 1644 #if defined (PETSC_USE_CTABLE) 1645 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1646 #else 1647 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1648 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1649 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1650 #endif 1651 } else a->colmap = 0; 1652 if (oldmat->garray) { 1653 int len; 1654 len = oldmat->B->n; 1655 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1656 PetscLogObjectMemory(mat,len*sizeof(int)); 1657 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1658 } else a->garray = 0; 1659 1660 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1661 PetscLogObjectParent(mat,a->lvec); 1662 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1663 PetscLogObjectParent(mat,a->Mvctx); 1664 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1665 PetscLogObjectParent(mat,a->A); 1666 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1667 PetscLogObjectParent(mat,a->B); 1668 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1669 *newmat = mat; 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #include "petscsys.h" 1674 1675 EXTERN_C_BEGIN 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatLoad_MPIAIJ" 1678 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1679 { 1680 Mat A; 1681 PetscScalar *vals,*svals; 1682 MPI_Comm comm = ((PetscObject)viewer)->comm; 1683 MPI_Status status; 1684 int i,nz,ierr,j,rstart,rend,fd; 1685 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1686 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1687 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1688 1689 PetscFunctionBegin; 1690 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1691 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1692 if (!rank) { 1693 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1694 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1695 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1696 if (header[3] < 0) { 1697 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1698 } 1699 } 1700 1701 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1702 M = header[1]; N = header[2]; 1703 /* determine ownership of all rows */ 1704 m = M/size + ((M % size) > rank); 1705 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1706 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1707 rowners[0] = 0; 1708 for (i=2; i<=size; i++) { 1709 rowners[i] += rowners[i-1]; 1710 } 1711 rstart = rowners[rank]; 1712 rend = rowners[rank+1]; 1713 1714 /* distribute row lengths to all processors */ 1715 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1716 offlens = ourlens + (rend-rstart); 1717 if (!rank) { 1718 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1719 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1720 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1721 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1722 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1723 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1724 } else { 1725 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1726 } 1727 1728 if (!rank) { 1729 /* calculate the number of nonzeros on each processor */ 1730 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1731 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1732 for (i=0; i<size; i++) { 1733 for (j=rowners[i]; j< rowners[i+1]; j++) { 1734 procsnz[i] += rowlengths[j]; 1735 } 1736 } 1737 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1738 1739 /* determine max buffer needed and allocate it */ 1740 maxnz = 0; 1741 for (i=0; i<size; i++) { 1742 maxnz = PetscMax(maxnz,procsnz[i]); 1743 } 1744 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1745 1746 /* read in my part of the matrix column indices */ 1747 nz = procsnz[0]; 1748 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1749 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1750 1751 /* read in every one elses and ship off */ 1752 for (i=1; i<size; i++) { 1753 nz = procsnz[i]; 1754 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1755 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1756 } 1757 ierr = PetscFree(cols);CHKERRQ(ierr); 1758 } else { 1759 /* determine buffer space needed for message */ 1760 nz = 0; 1761 for (i=0; i<m; i++) { 1762 nz += ourlens[i]; 1763 } 1764 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1765 1766 /* receive message of column indices*/ 1767 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1768 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1769 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1770 } 1771 1772 /* determine column ownership if matrix is not square */ 1773 if (N != M) { 1774 n = N/size + ((N % size) > rank); 1775 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1776 cstart = cend - n; 1777 } else { 1778 cstart = rstart; 1779 cend = rend; 1780 n = cend - cstart; 1781 } 1782 1783 /* loop over local rows, determining number of off diagonal entries */ 1784 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1785 jj = 0; 1786 for (i=0; i<m; i++) { 1787 for (j=0; j<ourlens[i]; j++) { 1788 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1789 jj++; 1790 } 1791 } 1792 1793 /* create our matrix */ 1794 for (i=0; i<m; i++) { 1795 ourlens[i] -= offlens[i]; 1796 } 1797 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1798 A = *newmat; 1799 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1800 for (i=0; i<m; i++) { 1801 ourlens[i] += offlens[i]; 1802 } 1803 1804 if (!rank) { 1805 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1806 1807 /* read in my part of the matrix numerical values */ 1808 nz = procsnz[0]; 1809 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1810 1811 /* insert into matrix */ 1812 jj = rstart; 1813 smycols = mycols; 1814 svals = vals; 1815 for (i=0; i<m; i++) { 1816 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1817 smycols += ourlens[i]; 1818 svals += ourlens[i]; 1819 jj++; 1820 } 1821 1822 /* read in other processors and ship out */ 1823 for (i=1; i<size; i++) { 1824 nz = procsnz[i]; 1825 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1826 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1827 } 1828 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1829 } else { 1830 /* receive numeric values */ 1831 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1832 1833 /* receive message of values*/ 1834 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1835 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1836 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1837 1838 /* insert into matrix */ 1839 jj = rstart; 1840 smycols = mycols; 1841 svals = vals; 1842 for (i=0; i<m; i++) { 1843 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1844 smycols += ourlens[i]; 1845 svals += ourlens[i]; 1846 jj++; 1847 } 1848 } 1849 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1850 ierr = PetscFree(vals);CHKERRQ(ierr); 1851 ierr = PetscFree(mycols);CHKERRQ(ierr); 1852 ierr = PetscFree(rowners);CHKERRQ(ierr); 1853 1854 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1855 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1856 PetscFunctionReturn(0); 1857 } 1858 EXTERN_C_END 1859 1860 #undef __FUNCT__ 1861 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1862 /* 1863 Not great since it makes two copies of the submatrix, first an SeqAIJ 1864 in local and then by concatenating the local matrices the end result. 1865 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1866 */ 1867 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1868 { 1869 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1870 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1871 Mat *local,M,Mreuse; 1872 PetscScalar *vwork,*aa; 1873 MPI_Comm comm = mat->comm; 1874 Mat_SeqAIJ *aij; 1875 1876 1877 PetscFunctionBegin; 1878 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1879 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1880 1881 if (call == MAT_REUSE_MATRIX) { 1882 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1883 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1884 local = &Mreuse; 1885 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1886 } else { 1887 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1888 Mreuse = *local; 1889 ierr = PetscFree(local);CHKERRQ(ierr); 1890 } 1891 1892 /* 1893 m - number of local rows 1894 n - number of columns (same on all processors) 1895 rstart - first row in new global matrix generated 1896 */ 1897 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1898 if (call == MAT_INITIAL_MATRIX) { 1899 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1900 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1901 ii = aij->i; 1902 jj = aij->j; 1903 1904 /* 1905 Determine the number of non-zeros in the diagonal and off-diagonal 1906 portions of the matrix in order to do correct preallocation 1907 */ 1908 1909 /* first get start and end of "diagonal" columns */ 1910 if (csize == PETSC_DECIDE) { 1911 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1912 if (mglobal == n) { /* square matrix */ 1913 nlocal = m; 1914 } else { 1915 nlocal = n/size + ((n % size) > rank); 1916 } 1917 } else { 1918 nlocal = csize; 1919 } 1920 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1921 rstart = rend - nlocal; 1922 if (rank == size - 1 && rend != n) { 1923 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1924 } 1925 1926 /* next, compute all the lengths */ 1927 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1928 olens = dlens + m; 1929 for (i=0; i<m; i++) { 1930 jend = ii[i+1] - ii[i]; 1931 olen = 0; 1932 dlen = 0; 1933 for (j=0; j<jend; j++) { 1934 if (*jj < rstart || *jj >= rend) olen++; 1935 else dlen++; 1936 jj++; 1937 } 1938 olens[i] = olen; 1939 dlens[i] = dlen; 1940 } 1941 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1942 ierr = PetscFree(dlens);CHKERRQ(ierr); 1943 } else { 1944 int ml,nl; 1945 1946 M = *newmat; 1947 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1948 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1949 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1950 /* 1951 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1952 rather than the slower MatSetValues(). 1953 */ 1954 M->was_assembled = PETSC_TRUE; 1955 M->assembled = PETSC_FALSE; 1956 } 1957 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1958 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1959 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1960 ii = aij->i; 1961 jj = aij->j; 1962 aa = aij->a; 1963 for (i=0; i<m; i++) { 1964 row = rstart + i; 1965 nz = ii[i+1] - ii[i]; 1966 cwork = jj; jj += nz; 1967 vwork = aa; aa += nz; 1968 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1969 } 1970 1971 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1972 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1973 *newmat = M; 1974 1975 /* save submatrix used in processor for next request */ 1976 if (call == MAT_INITIAL_MATRIX) { 1977 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1978 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1979 } 1980 1981 PetscFunctionReturn(0); 1982 } 1983 1984 #undef __FUNCT__ 1985 #define __FUNCT__ "MatMPIAIJSetPreallocation" 1986 /*@C 1987 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1988 (the default parallel PETSc format). For good matrix assembly performance 1989 the user should preallocate the matrix storage by setting the parameters 1990 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1991 performance can be increased by more than a factor of 50. 1992 1993 Collective on MPI_Comm 1994 1995 Input Parameters: 1996 + A - the matrix 1997 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1998 (same value is used for all local rows) 1999 . d_nnz - array containing the number of nonzeros in the various rows of the 2000 DIAGONAL portion of the local submatrix (possibly different for each row) 2001 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2002 The size of this array is equal to the number of local rows, i.e 'm'. 2003 You must leave room for the diagonal entry even if it is zero. 2004 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2005 submatrix (same value is used for all local rows). 2006 - o_nnz - array containing the number of nonzeros in the various rows of the 2007 OFF-DIAGONAL portion of the local submatrix (possibly different for 2008 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2009 structure. The size of this array is equal to the number 2010 of local rows, i.e 'm'. 2011 2012 The AIJ format (also called the Yale sparse matrix format or 2013 compressed row storage), is fully compatible with standard Fortran 77 2014 storage. That is, the stored row and column indices can begin at 2015 either one (as in Fortran) or zero. See the users manual for details. 2016 2017 The user MUST specify either the local or global matrix dimensions 2018 (possibly both). 2019 2020 The parallel matrix is partitioned such that the first m0 rows belong to 2021 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2022 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2023 2024 The DIAGONAL portion of the local submatrix of a processor can be defined 2025 as the submatrix which is obtained by extraction the part corresponding 2026 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2027 first row that belongs to the processor, and r2 is the last row belonging 2028 to the this processor. This is a square mxm matrix. The remaining portion 2029 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2030 2031 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2032 2033 By default, this format uses inodes (identical nodes) when possible. 2034 We search for consecutive rows with the same nonzero structure, thereby 2035 reusing matrix information to achieve increased efficiency. 2036 2037 Options Database Keys: 2038 + -mat_aij_no_inode - Do not use inodes 2039 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2040 - -mat_aij_oneindex - Internally use indexing starting at 1 2041 rather than 0. Note that when calling MatSetValues(), 2042 the user still MUST index entries starting at 0! 2043 2044 Example usage: 2045 2046 Consider the following 8x8 matrix with 34 non-zero values, that is 2047 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2048 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2049 as follows: 2050 2051 .vb 2052 1 2 0 | 0 3 0 | 0 4 2053 Proc0 0 5 6 | 7 0 0 | 8 0 2054 9 0 10 | 11 0 0 | 12 0 2055 ------------------------------------- 2056 13 0 14 | 15 16 17 | 0 0 2057 Proc1 0 18 0 | 19 20 21 | 0 0 2058 0 0 0 | 22 23 0 | 24 0 2059 ------------------------------------- 2060 Proc2 25 26 27 | 0 0 28 | 29 0 2061 30 0 0 | 31 32 33 | 0 34 2062 .ve 2063 2064 This can be represented as a collection of submatrices as: 2065 2066 .vb 2067 A B C 2068 D E F 2069 G H I 2070 .ve 2071 2072 Where the submatrices A,B,C are owned by proc0, D,E,F are 2073 owned by proc1, G,H,I are owned by proc2. 2074 2075 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2076 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2077 The 'M','N' parameters are 8,8, and have the same values on all procs. 2078 2079 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2080 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2081 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2082 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2083 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2084 matrix, ans [DF] as another SeqAIJ matrix. 2085 2086 When d_nz, o_nz parameters are specified, d_nz storage elements are 2087 allocated for every row of the local diagonal submatrix, and o_nz 2088 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2089 One way to choose d_nz and o_nz is to use the max nonzerors per local 2090 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2091 In this case, the values of d_nz,o_nz are: 2092 .vb 2093 proc0 : dnz = 2, o_nz = 2 2094 proc1 : dnz = 3, o_nz = 2 2095 proc2 : dnz = 1, o_nz = 4 2096 .ve 2097 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2098 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2099 for proc3. i.e we are using 12+15+10=37 storage locations to store 2100 34 values. 2101 2102 When d_nnz, o_nnz parameters are specified, the storage is specified 2103 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2104 In the above case the values for d_nnz,o_nnz are: 2105 .vb 2106 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2107 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2108 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2109 .ve 2110 Here the space allocated is sum of all the above values i.e 34, and 2111 hence pre-allocation is perfect. 2112 2113 Level: intermediate 2114 2115 .keywords: matrix, aij, compressed row, sparse, parallel 2116 2117 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2118 @*/ 2119 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2120 { 2121 Mat_MPIAIJ *b; 2122 int ierr,i; 2123 PetscTruth flg2; 2124 2125 PetscFunctionBegin; 2126 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2127 if (!flg2) PetscFunctionReturn(0); 2128 B->preallocated = PETSC_TRUE; 2129 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2130 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2131 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2132 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2133 if (d_nnz) { 2134 for (i=0; i<B->m; i++) { 2135 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2136 } 2137 } 2138 if (o_nnz) { 2139 for (i=0; i<B->m; i++) { 2140 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2141 } 2142 } 2143 b = (Mat_MPIAIJ*)B->data; 2144 2145 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2146 PetscLogObjectParent(B,b->A); 2147 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2148 PetscLogObjectParent(B,b->B); 2149 2150 PetscFunctionReturn(0); 2151 } 2152 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatCreateMPIAIJ" 2155 /*@C 2156 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2157 (the default parallel PETSc format). For good matrix assembly performance 2158 the user should preallocate the matrix storage by setting the parameters 2159 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2160 performance can be increased by more than a factor of 50. 2161 2162 Collective on MPI_Comm 2163 2164 Input Parameters: 2165 + comm - MPI communicator 2166 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2167 This value should be the same as the local size used in creating the 2168 y vector for the matrix-vector product y = Ax. 2169 . n - This value should be the same as the local size used in creating the 2170 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2171 calculated if N is given) For square matrices n is almost always m. 2172 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2173 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2174 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2175 (same value is used for all local rows) 2176 . d_nnz - array containing the number of nonzeros in the various rows of the 2177 DIAGONAL portion of the local submatrix (possibly different for each row) 2178 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2179 The size of this array is equal to the number of local rows, i.e 'm'. 2180 You must leave room for the diagonal entry even if it is zero. 2181 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2182 submatrix (same value is used for all local rows). 2183 - o_nnz - array containing the number of nonzeros in the various rows of the 2184 OFF-DIAGONAL portion of the local submatrix (possibly different for 2185 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2186 structure. The size of this array is equal to the number 2187 of local rows, i.e 'm'. 2188 2189 Output Parameter: 2190 . A - the matrix 2191 2192 Notes: 2193 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2194 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2195 storage requirements for this matrix. 2196 2197 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2198 processor than it must be used on all processors that share the object for 2199 that argument. 2200 2201 The AIJ format (also called the Yale sparse matrix format or 2202 compressed row storage), is fully compatible with standard Fortran 77 2203 storage. That is, the stored row and column indices can begin at 2204 either one (as in Fortran) or zero. See the users manual for details. 2205 2206 The user MUST specify either the local or global matrix dimensions 2207 (possibly both). 2208 2209 The parallel matrix is partitioned such that the first m0 rows belong to 2210 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2211 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2212 2213 The DIAGONAL portion of the local submatrix of a processor can be defined 2214 as the submatrix which is obtained by extraction the part corresponding 2215 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2216 first row that belongs to the processor, and r2 is the last row belonging 2217 to the this processor. This is a square mxm matrix. The remaining portion 2218 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2219 2220 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2221 2222 By default, this format uses inodes (identical nodes) when possible. 2223 We search for consecutive rows with the same nonzero structure, thereby 2224 reusing matrix information to achieve increased efficiency. 2225 2226 Options Database Keys: 2227 + -mat_aij_no_inode - Do not use inodes 2228 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2229 - -mat_aij_oneindex - Internally use indexing starting at 1 2230 rather than 0. Note that when calling MatSetValues(), 2231 the user still MUST index entries starting at 0! 2232 2233 2234 Example usage: 2235 2236 Consider the following 8x8 matrix with 34 non-zero values, that is 2237 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2238 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2239 as follows: 2240 2241 .vb 2242 1 2 0 | 0 3 0 | 0 4 2243 Proc0 0 5 6 | 7 0 0 | 8 0 2244 9 0 10 | 11 0 0 | 12 0 2245 ------------------------------------- 2246 13 0 14 | 15 16 17 | 0 0 2247 Proc1 0 18 0 | 19 20 21 | 0 0 2248 0 0 0 | 22 23 0 | 24 0 2249 ------------------------------------- 2250 Proc2 25 26 27 | 0 0 28 | 29 0 2251 30 0 0 | 31 32 33 | 0 34 2252 .ve 2253 2254 This can be represented as a collection of submatrices as: 2255 2256 .vb 2257 A B C 2258 D E F 2259 G H I 2260 .ve 2261 2262 Where the submatrices A,B,C are owned by proc0, D,E,F are 2263 owned by proc1, G,H,I are owned by proc2. 2264 2265 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2266 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2267 The 'M','N' parameters are 8,8, and have the same values on all procs. 2268 2269 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2270 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2271 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2272 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2273 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2274 matrix, ans [DF] as another SeqAIJ matrix. 2275 2276 When d_nz, o_nz parameters are specified, d_nz storage elements are 2277 allocated for every row of the local diagonal submatrix, and o_nz 2278 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2279 One way to choose d_nz and o_nz is to use the max nonzerors per local 2280 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2281 In this case, the values of d_nz,o_nz are: 2282 .vb 2283 proc0 : dnz = 2, o_nz = 2 2284 proc1 : dnz = 3, o_nz = 2 2285 proc2 : dnz = 1, o_nz = 4 2286 .ve 2287 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2288 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2289 for proc3. i.e we are using 12+15+10=37 storage locations to store 2290 34 values. 2291 2292 When d_nnz, o_nnz parameters are specified, the storage is specified 2293 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2294 In the above case the values for d_nnz,o_nnz are: 2295 .vb 2296 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2297 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2298 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2299 .ve 2300 Here the space allocated is sum of all the above values i.e 34, and 2301 hence pre-allocation is perfect. 2302 2303 Level: intermediate 2304 2305 .keywords: matrix, aij, compressed row, sparse, parallel 2306 2307 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2308 @*/ 2309 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2310 { 2311 int ierr,size; 2312 2313 PetscFunctionBegin; 2314 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2315 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2316 if (size > 1) { 2317 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2318 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2319 } else { 2320 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2321 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2322 } 2323 PetscFunctionReturn(0); 2324 } 2325 2326 #undef __FUNCT__ 2327 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2328 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2329 { 2330 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2331 PetscFunctionBegin; 2332 *Ad = a->A; 2333 *Ao = a->B; 2334 *colmap = a->garray; 2335 PetscFunctionReturn(0); 2336 } 2337 2338 #undef __FUNCT__ 2339 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2340 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2341 { 2342 int ierr; 2343 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2344 2345 PetscFunctionBegin; 2346 if (coloring->ctype == IS_COLORING_LOCAL) { 2347 int *allcolors,*colors,i; 2348 ISColoring ocoloring; 2349 2350 /* set coloring for diagonal portion */ 2351 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2352 2353 /* set coloring for off-diagonal portion */ 2354 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2355 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2356 for (i=0; i<a->B->n; i++) { 2357 colors[i] = allcolors[a->garray[i]]; 2358 } 2359 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2360 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2361 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2362 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2363 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2364 int *colors,i,*larray; 2365 ISColoring ocoloring; 2366 2367 /* set coloring for diagonal portion */ 2368 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2369 for (i=0; i<a->A->n; i++) { 2370 larray[i] = i + a->cstart; 2371 } 2372 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2373 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2374 for (i=0; i<a->A->n; i++) { 2375 colors[i] = coloring->colors[larray[i]]; 2376 } 2377 ierr = PetscFree(larray);CHKERRQ(ierr); 2378 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2379 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2380 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2381 2382 /* set coloring for off-diagonal portion */ 2383 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2384 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2385 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2386 for (i=0; i<a->B->n; i++) { 2387 colors[i] = coloring->colors[larray[i]]; 2388 } 2389 ierr = PetscFree(larray);CHKERRQ(ierr); 2390 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2391 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2392 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2393 } else { 2394 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2395 } 2396 2397 PetscFunctionReturn(0); 2398 } 2399 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2402 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2403 { 2404 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2405 int ierr; 2406 2407 PetscFunctionBegin; 2408 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2409 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2410 PetscFunctionReturn(0); 2411 } 2412 2413 #undef __FUNCT__ 2414 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2415 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2416 { 2417 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2418 int ierr; 2419 2420 PetscFunctionBegin; 2421 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2422 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 #undef __FUNCT__ 2427 #define __FUNCT__ "MatFileMerge" 2428 /*@C 2429 MatFileMerge - Saves a single large PETSc matrix by reading in 2430 sequential matrices from each processor and concatinating them 2431 together. 2432 2433 Collective on MPI_Comm 2434 2435 Input Parameters: 2436 + comm - the communicators the individual files are on 2437 . infile - name of input file 2438 - outfile - name of output file 2439 2440 Level: advanced 2441 2442 Notes: The number of columns of the matrix in EACH of the seperate files 2443 MUST be the same. 2444 2445 The true name of the input files on each processor is obtained by 2446 appending .rank onto the infile string; where rank is 0 up to one less 2447 then the number of processors 2448 @*/ 2449 int MatFileMerge(MPI_Comm comm,char *infile,char *outfile) 2450 { 2451 int ierr,rank,len,m,n,i,rstart,*indx,nnz,I; 2452 PetscViewer in,out; 2453 char *name; 2454 Mat A,B; 2455 PetscScalar *values; 2456 2457 PetscFunctionBegin; 2458 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2459 ierr = PetscStrlen(infile,&len);CHKERRQ(ierr); 2460 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2461 sprintf(name,"%s.%d",infile,rank); 2462 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_RDONLY,&in);CHKERRQ(ierr); 2463 ierr = PetscFree(name); 2464 ierr = MatLoad(in,MATSEQAIJ,&A);CHKERRQ(ierr); 2465 ierr = PetscViewerDestroy(in);CHKERRQ(ierr); 2466 2467 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 2468 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DETERMINE,n,0,0,0,0,&B);CHKERRQ(ierr); 2469 ierr = MatGetOwnershipRange(B,&rstart,0);CHKERRQ(ierr); 2470 for (i=0;i<m;i++) { 2471 ierr = MatGetRow(A,i,&nnz,&indx,&values);CHKERRQ(ierr); 2472 I = i + rstart; 2473 ierr = MatSetValues(B,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2474 ierr = MatRestoreRow(A,i,&nnz,&indx,&values);CHKERRQ(ierr); 2475 } 2476 ierr = MatDestroy(A);CHKERRQ(ierr); 2477 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2478 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2479 2480 ierr = PetscViewerBinaryOpen(comm,outfile,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2481 ierr = MatView(B,out);CHKERRQ(ierr); 2482 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2483 ierr = MatDestroy(B);CHKERRQ(ierr); 2484 PetscFunctionReturn(0); 2485 } 2486 2487 #undef __FUNCT__ 2488 #define __FUNCT__ "MatFileSplit" 2489 int MatFileSplit(Mat A,char *outfile) 2490 { 2491 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2492 PetscViewer out; 2493 char *name; 2494 Mat B; 2495 PetscScalar *values; 2496 2497 PetscFunctionBegin; 2498 2499 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2500 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2501 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2502 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2503 for (i=0;i<m;i++) { 2504 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2505 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2506 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2507 } 2508 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2509 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2510 2511 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2512 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2513 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2514 sprintf(name,"%s.%d",outfile,rank); 2515 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2516 ierr = PetscFree(name); 2517 ierr = MatView(B,out);CHKERRQ(ierr); 2518 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2519 ierr = MatDestroy(B);CHKERRQ(ierr); 2520 PetscFunctionReturn(0); 2521 } 2522