1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 19 a slightly higher hash table cost; without it it is not scalable (each processor 20 has an order N integer array but is fast to acess. 21 */ 22 #undef __FUNCT__ 23 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 24 int CreateColmap_MPIAIJ_Private(Mat mat) 25 { 26 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 27 int n = aij->B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 32 for (i=0; i<n; i++){ 33 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 37 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 39 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for (_i=low; _i<high; _i++) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 71 PetscScalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 77 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 78 new_j = (int*)(new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 83 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = PETSC_TRUE; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for (ii=N; ii>=_i; ii--) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for (_i=low; _i<high; _i++) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 145 PetscScalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 151 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 152 new_j = (int*)(new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 157 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = PETSC_TRUE; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for (ii=N; ii>=_i; ii--) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 198 PetscScalar value; 199 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 200 int cstart = aij->cstart,cend = aij->cend,row,col; 201 PetscTruth roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 206 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 207 PetscScalar *aa = a->a; 208 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 211 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 212 PetscScalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 PetscScalar *ap; 217 218 PetscFunctionBegin; 219 for (i=0; i<m; i++) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 if (ignorezeroentries && value == 0.0) continue; 231 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 232 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 233 } else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_BOPT_g) 235 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!aij->colmap) { 240 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 244 col--; 245 #else 246 col = aij->colmap[in[j]] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 249 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 252 B = aij->B; 253 b = (Mat_SeqAIJ*)B->data; 254 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 255 ba = b->a; 256 } 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 if (ignorezeroentries && value == 0.0) continue; 260 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 261 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 262 } 263 } 264 } else { 265 if (!aij->donotstash) { 266 if (roworiented) { 267 if (ignorezeroentries && v[i*n] == 0.0) continue; 268 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 269 } else { 270 if (ignorezeroentries && v[i] == 0.0) continue; 271 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 272 } 273 } 274 } 275 } 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatGetValues_MPIAIJ" 281 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 282 { 283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 284 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 285 int cstart = aij->cstart,cend = aij->cend,row,col; 286 287 PetscFunctionBegin; 288 for (i=0; i<m; i++) { 289 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 290 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 291 if (idxm[i] >= rstart && idxm[i] < rend) { 292 row = idxm[i] - rstart; 293 for (j=0; j<n; j++) { 294 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 295 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 296 if (idxn[j] >= cstart && idxn[j] < cend){ 297 col = idxn[j] - cstart; 298 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 299 } else { 300 if (!aij->colmap) { 301 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 302 } 303 #if defined (PETSC_USE_CTABLE) 304 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 305 col --; 306 #else 307 col = aij->colmap[idxn[j]] - 1; 308 #endif 309 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 310 else { 311 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 312 } 313 } 314 } 315 } else { 316 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 317 } 318 } 319 PetscFunctionReturn(0); 320 } 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 324 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 325 { 326 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 327 int ierr,nstash,reallocs; 328 InsertMode addv; 329 330 PetscFunctionBegin; 331 if (aij->donotstash) { 332 PetscFunctionReturn(0); 333 } 334 335 /* make sure all processors are either in INSERTMODE or ADDMODE */ 336 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 337 if (addv == (ADD_VALUES|INSERT_VALUES)) { 338 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 339 } 340 mat->insertmode = addv; /* in case this processor had no cache */ 341 342 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 343 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 344 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 345 PetscFunctionReturn(0); 346 } 347 348 349 #undef __FUNCT__ 350 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 351 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 352 { 353 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 #if defined(PETSC_HAVE_SUPERLUDIST) 408 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 409 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 410 #endif 411 PetscFunctionReturn(0); 412 } 413 414 #undef __FUNCT__ 415 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 416 int MatZeroEntries_MPIAIJ(Mat A) 417 { 418 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 419 int ierr; 420 421 PetscFunctionBegin; 422 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 423 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatZeroRows_MPIAIJ" 429 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 430 { 431 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 432 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 433 int *procs,*nprocs,j,idx,nsends,*work,row; 434 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 435 int *rvalues,tag = A->tag,count,base,slen,n,*source; 436 int *lens,imdex,*lrows,*values,rstart=l->rstart; 437 MPI_Comm comm = A->comm; 438 MPI_Request *send_waits,*recv_waits; 439 MPI_Status recv_status,*send_status; 440 IS istmp; 441 PetscTruth found; 442 443 PetscFunctionBegin; 444 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 445 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 446 447 /* first count number of contributors to each processor */ 448 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 449 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 450 procs = nprocs + size; 451 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 452 for (i=0; i<N; i++) { 453 idx = rows[i]; 454 found = PETSC_FALSE; 455 for (j=0; j<size; j++) { 456 if (idx >= owners[j] && idx < owners[j+1]) { 457 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 458 } 459 } 460 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 461 } 462 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 463 464 /* inform other processors of number of messages and max length*/ 465 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 466 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 467 nrecvs = work[size+rank]; 468 nmax = work[rank]; 469 ierr = PetscFree(work);CHKERRQ(ierr); 470 471 /* post receives: */ 472 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 473 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 474 for (i=0; i<nrecvs; i++) { 475 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 476 } 477 478 /* do sends: 479 1) starts[i] gives the starting index in svalues for stuff going to 480 the ith processor 481 */ 482 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 483 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 484 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 485 starts[0] = 0; 486 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 487 for (i=0; i<N; i++) { 488 svalues[starts[owner[i]]++] = rows[i]; 489 } 490 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 491 492 starts[0] = 0; 493 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 494 count = 0; 495 for (i=0; i<size; i++) { 496 if (procs[i]) { 497 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 498 } 499 } 500 ierr = PetscFree(starts);CHKERRQ(ierr); 501 502 base = owners[rank]; 503 504 /* wait on receives */ 505 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 506 source = lens + nrecvs; 507 count = nrecvs; slen = 0; 508 while (count) { 509 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 510 /* unpack receives into our local space */ 511 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 512 source[imdex] = recv_status.MPI_SOURCE; 513 lens[imdex] = n; 514 slen += n; 515 count--; 516 } 517 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 518 519 /* move the data into the send scatter */ 520 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 521 count = 0; 522 for (i=0; i<nrecvs; i++) { 523 values = rvalues + i*nmax; 524 for (j=0; j<lens[i]; j++) { 525 lrows[count++] = values[j] - base; 526 } 527 } 528 ierr = PetscFree(rvalues);CHKERRQ(ierr); 529 ierr = PetscFree(lens);CHKERRQ(ierr); 530 ierr = PetscFree(owner);CHKERRQ(ierr); 531 ierr = PetscFree(nprocs);CHKERRQ(ierr); 532 533 /* actually zap the local rows */ 534 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 535 PetscLogObjectParent(A,istmp); 536 537 /* 538 Zero the required rows. If the "diagonal block" of the matrix 539 is square and the user wishes to set the diagonal we use seperate 540 code so that MatSetValues() is not called for each diagonal allocating 541 new memory, thus calling lots of mallocs and slowing things down. 542 543 Contributed by: Mathew Knepley 544 */ 545 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 546 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 547 if (diag && (l->A->M == l->A->N)) { 548 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 549 } else if (diag) { 550 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 551 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 552 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 553 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 554 } 555 for (i = 0; i < slen; i++) { 556 row = lrows[i] + rstart; 557 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 558 } 559 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 560 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 561 } else { 562 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 563 } 564 ierr = ISDestroy(istmp);CHKERRQ(ierr); 565 ierr = PetscFree(lrows);CHKERRQ(ierr); 566 567 /* wait on sends */ 568 if (nsends) { 569 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 570 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 571 ierr = PetscFree(send_status);CHKERRQ(ierr); 572 } 573 ierr = PetscFree(send_waits);CHKERRQ(ierr); 574 ierr = PetscFree(svalues);CHKERRQ(ierr); 575 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatMult_MPIAIJ" 581 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 582 { 583 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 584 int ierr,nt; 585 586 PetscFunctionBegin; 587 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 588 if (nt != A->n) { 589 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 590 } 591 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 593 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 594 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatMultAdd_MPIAIJ" 600 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 601 { 602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 603 int ierr; 604 605 PetscFunctionBegin; 606 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 607 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 608 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 609 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNCT__ 614 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 615 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 616 { 617 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 618 int ierr; 619 620 PetscFunctionBegin; 621 /* do nondiagonal part */ 622 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 623 /* send it on its way */ 624 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 625 /* do local part */ 626 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 627 /* receive remote parts: note this assumes the values are not actually */ 628 /* inserted in yy until the next line, which is true for my implementation*/ 629 /* but is not perhaps always true. */ 630 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 636 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 637 { 638 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 639 int ierr; 640 641 PetscFunctionBegin; 642 /* do nondiagonal part */ 643 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 644 /* send it on its way */ 645 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 646 /* do local part */ 647 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 648 /* receive remote parts: note this assumes the values are not actually */ 649 /* inserted in yy until the next line, which is true for my implementation*/ 650 /* but is not perhaps always true. */ 651 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 655 /* 656 This only works correctly for square matrices where the subblock A->A is the 657 diagonal block 658 */ 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 661 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 662 { 663 int ierr; 664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 665 666 PetscFunctionBegin; 667 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 668 if (a->rstart != a->cstart || a->rend != a->cend) { 669 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 670 } 671 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatScale_MPIAIJ" 677 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 678 { 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 int ierr; 681 682 PetscFunctionBegin; 683 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 684 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatDestroy_MPIAIJ" 690 int MatDestroy_MPIAIJ(Mat mat) 691 { 692 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 693 int ierr; 694 695 PetscFunctionBegin; 696 #if defined(PETSC_USE_LOG) 697 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 698 #endif 699 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 700 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 701 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 702 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 703 #if defined (PETSC_USE_CTABLE) 704 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 705 #else 706 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 707 #endif 708 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 709 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 710 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 711 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 712 ierr = PetscFree(aij);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 717 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 720 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 721 { 722 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 723 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 724 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 725 PetscTruth isdraw,isascii,flg; 726 PetscViewer sviewer; 727 PetscViewerFormat format; 728 729 PetscFunctionBegin; 730 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 731 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 732 if (isascii) { 733 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 734 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 735 MatInfo info; 736 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 737 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 738 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 739 if (flg) { 740 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 741 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 742 } else { 743 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 744 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 745 } 746 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 747 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 748 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 749 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 750 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 751 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 752 PetscFunctionReturn(0); 753 } else if (format == PETSC_VIEWER_ASCII_INFO) { 754 PetscFunctionReturn(0); 755 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 756 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 757 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 758 #endif 759 PetscFunctionReturn(0); 760 } 761 } else if (isdraw) { 762 PetscDraw draw; 763 PetscTruth isnull; 764 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 765 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 766 } 767 768 if (size == 1) { 769 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 770 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 771 } else { 772 /* assemble the entire matrix onto first processor. */ 773 Mat A; 774 Mat_SeqAIJ *Aloc; 775 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 776 PetscScalar *a; 777 778 if (!rank) { 779 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 780 } else { 781 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 782 } 783 PetscLogObjectParent(mat,A); 784 785 /* copy over the A part */ 786 Aloc = (Mat_SeqAIJ*)aij->A->data; 787 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 788 row = aij->rstart; 789 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 790 for (i=0; i<m; i++) { 791 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 792 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 793 } 794 aj = Aloc->j; 795 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 796 797 /* copy over the B part */ 798 Aloc = (Mat_SeqAIJ*)aij->B->data; 799 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 800 row = aij->rstart; 801 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 802 ct = cols; 803 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 804 for (i=0; i<m; i++) { 805 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 806 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 807 } 808 ierr = PetscFree(ct);CHKERRQ(ierr); 809 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 810 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 811 /* 812 Everyone has to call to draw the matrix since the graphics waits are 813 synchronized across all processors that share the PetscDraw object 814 */ 815 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 816 if (!rank) { 817 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 818 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 819 } 820 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 821 ierr = MatDestroy(A);CHKERRQ(ierr); 822 } 823 PetscFunctionReturn(0); 824 } 825 826 #undef __FUNCT__ 827 #define __FUNCT__ "MatView_MPIAIJ" 828 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 829 { 830 int ierr; 831 PetscTruth isascii,isdraw,issocket,isbinary; 832 833 PetscFunctionBegin; 834 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 835 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 836 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 837 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 838 if (isascii || isdraw || isbinary || issocket) { 839 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 840 } else { 841 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 842 } 843 PetscFunctionReturn(0); 844 } 845 846 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatRelax_MPIAIJ" 850 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 851 { 852 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 853 int ierr; 854 Vec bb1; 855 PetscScalar mone=-1.0; 856 857 PetscFunctionBegin; 858 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 859 860 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 861 862 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 863 if (flag & SOR_ZERO_INITIAL_GUESS) { 864 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 865 its--; 866 } 867 868 while (its--) { 869 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 870 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 871 872 /* update rhs: bb1 = bb - B*x */ 873 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 874 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 875 876 /* local sweep */ 877 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 878 CHKERRQ(ierr); 879 } 880 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 881 if (flag & SOR_ZERO_INITIAL_GUESS) { 882 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 883 its--; 884 } 885 while (its--) { 886 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 887 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 888 889 /* update rhs: bb1 = bb - B*x */ 890 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 891 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 892 893 /* local sweep */ 894 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 895 CHKERRQ(ierr); 896 } 897 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 898 if (flag & SOR_ZERO_INITIAL_GUESS) { 899 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 900 its--; 901 } 902 while (its--) { 903 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 904 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 905 906 /* update rhs: bb1 = bb - B*x */ 907 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 908 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 909 910 /* local sweep */ 911 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 912 CHKERRQ(ierr); 913 } 914 } else { 915 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 916 } 917 918 ierr = VecDestroy(bb1);CHKERRQ(ierr); 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "MatGetInfo_MPIAIJ" 924 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 925 { 926 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 927 Mat A = mat->A,B = mat->B; 928 int ierr; 929 PetscReal isend[5],irecv[5]; 930 931 PetscFunctionBegin; 932 info->block_size = 1.0; 933 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 934 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 935 isend[3] = info->memory; isend[4] = info->mallocs; 936 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 937 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 938 isend[3] += info->memory; isend[4] += info->mallocs; 939 if (flag == MAT_LOCAL) { 940 info->nz_used = isend[0]; 941 info->nz_allocated = isend[1]; 942 info->nz_unneeded = isend[2]; 943 info->memory = isend[3]; 944 info->mallocs = isend[4]; 945 } else if (flag == MAT_GLOBAL_MAX) { 946 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 947 info->nz_used = irecv[0]; 948 info->nz_allocated = irecv[1]; 949 info->nz_unneeded = irecv[2]; 950 info->memory = irecv[3]; 951 info->mallocs = irecv[4]; 952 } else if (flag == MAT_GLOBAL_SUM) { 953 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 954 info->nz_used = irecv[0]; 955 info->nz_allocated = irecv[1]; 956 info->nz_unneeded = irecv[2]; 957 info->memory = irecv[3]; 958 info->mallocs = irecv[4]; 959 } 960 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 961 info->fill_ratio_needed = 0; 962 info->factor_mallocs = 0; 963 info->rows_global = (double)matin->M; 964 info->columns_global = (double)matin->N; 965 info->rows_local = (double)matin->m; 966 info->columns_local = (double)matin->N; 967 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatSetOption_MPIAIJ" 973 int MatSetOption_MPIAIJ(Mat A,MatOption op) 974 { 975 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 976 int ierr; 977 978 PetscFunctionBegin; 979 switch (op) { 980 case MAT_NO_NEW_NONZERO_LOCATIONS: 981 case MAT_YES_NEW_NONZERO_LOCATIONS: 982 case MAT_COLUMNS_UNSORTED: 983 case MAT_COLUMNS_SORTED: 984 case MAT_NEW_NONZERO_ALLOCATION_ERR: 985 case MAT_KEEP_ZEROED_ROWS: 986 case MAT_NEW_NONZERO_LOCATION_ERR: 987 case MAT_USE_INODES: 988 case MAT_DO_NOT_USE_INODES: 989 case MAT_IGNORE_ZERO_ENTRIES: 990 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 991 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 992 break; 993 case MAT_ROW_ORIENTED: 994 a->roworiented = PETSC_TRUE; 995 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 996 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 997 break; 998 case MAT_ROWS_SORTED: 999 case MAT_ROWS_UNSORTED: 1000 case MAT_YES_NEW_DIAGONALS: 1001 case MAT_USE_SINGLE_PRECISION_SOLVES: 1002 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1003 break; 1004 case MAT_COLUMN_ORIENTED: 1005 a->roworiented = PETSC_FALSE; 1006 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1007 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1008 break; 1009 case MAT_IGNORE_OFF_PROC_ENTRIES: 1010 a->donotstash = PETSC_TRUE; 1011 break; 1012 case MAT_NO_NEW_DIAGONALS: 1013 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1014 default: 1015 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1016 } 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatGetRow_MPIAIJ" 1022 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1023 { 1024 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1025 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1026 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1027 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1028 int *cmap,*idx_p; 1029 1030 PetscFunctionBegin; 1031 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1032 mat->getrowactive = PETSC_TRUE; 1033 1034 if (!mat->rowvalues && (idx || v)) { 1035 /* 1036 allocate enough space to hold information from the longest row. 1037 */ 1038 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1039 int max = 1,tmp; 1040 for (i=0; i<matin->m; i++) { 1041 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1042 if (max < tmp) { max = tmp; } 1043 } 1044 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1045 mat->rowindices = (int*)(mat->rowvalues + max); 1046 } 1047 1048 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1049 lrow = row - rstart; 1050 1051 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1052 if (!v) {pvA = 0; pvB = 0;} 1053 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1054 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1055 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1056 nztot = nzA + nzB; 1057 1058 cmap = mat->garray; 1059 if (v || idx) { 1060 if (nztot) { 1061 /* Sort by increasing column numbers, assuming A and B already sorted */ 1062 int imark = -1; 1063 if (v) { 1064 *v = v_p = mat->rowvalues; 1065 for (i=0; i<nzB; i++) { 1066 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1067 else break; 1068 } 1069 imark = i; 1070 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1071 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1072 } 1073 if (idx) { 1074 *idx = idx_p = mat->rowindices; 1075 if (imark > -1) { 1076 for (i=0; i<imark; i++) { 1077 idx_p[i] = cmap[cworkB[i]]; 1078 } 1079 } else { 1080 for (i=0; i<nzB; i++) { 1081 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1082 else break; 1083 } 1084 imark = i; 1085 } 1086 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1087 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1088 } 1089 } else { 1090 if (idx) *idx = 0; 1091 if (v) *v = 0; 1092 } 1093 } 1094 *nz = nztot; 1095 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1096 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1102 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1103 { 1104 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1105 1106 PetscFunctionBegin; 1107 if (aij->getrowactive == PETSC_FALSE) { 1108 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1109 } 1110 aij->getrowactive = PETSC_FALSE; 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatNorm_MPIAIJ" 1116 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1117 { 1118 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1119 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1120 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1121 PetscReal sum = 0.0; 1122 PetscScalar *v; 1123 1124 PetscFunctionBegin; 1125 if (aij->size == 1) { 1126 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1127 } else { 1128 if (type == NORM_FROBENIUS) { 1129 v = amat->a; 1130 for (i=0; i<amat->nz; i++) { 1131 #if defined(PETSC_USE_COMPLEX) 1132 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1133 #else 1134 sum += (*v)*(*v); v++; 1135 #endif 1136 } 1137 v = bmat->a; 1138 for (i=0; i<bmat->nz; i++) { 1139 #if defined(PETSC_USE_COMPLEX) 1140 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1141 #else 1142 sum += (*v)*(*v); v++; 1143 #endif 1144 } 1145 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1146 *norm = sqrt(*norm); 1147 } else if (type == NORM_1) { /* max column norm */ 1148 PetscReal *tmp,*tmp2; 1149 int *jj,*garray = aij->garray; 1150 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1151 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1152 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1153 *norm = 0.0; 1154 v = amat->a; jj = amat->j; 1155 for (j=0; j<amat->nz; j++) { 1156 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1157 } 1158 v = bmat->a; jj = bmat->j; 1159 for (j=0; j<bmat->nz; j++) { 1160 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1161 } 1162 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1163 for (j=0; j<mat->N; j++) { 1164 if (tmp2[j] > *norm) *norm = tmp2[j]; 1165 } 1166 ierr = PetscFree(tmp);CHKERRQ(ierr); 1167 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1168 } else if (type == NORM_INFINITY) { /* max row norm */ 1169 PetscReal ntemp = 0.0; 1170 for (j=0; j<aij->A->m; j++) { 1171 v = amat->a + amat->i[j] + shift; 1172 sum = 0.0; 1173 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1174 sum += PetscAbsScalar(*v); v++; 1175 } 1176 v = bmat->a + bmat->i[j] + shift; 1177 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1178 sum += PetscAbsScalar(*v); v++; 1179 } 1180 if (sum > ntemp) ntemp = sum; 1181 } 1182 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1183 } else { 1184 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1185 } 1186 } 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatTranspose_MPIAIJ" 1192 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1193 { 1194 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1195 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1196 int ierr,shift = Aloc->indexshift; 1197 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1198 Mat B; 1199 PetscScalar *array; 1200 1201 PetscFunctionBegin; 1202 if (!matout && M != N) { 1203 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1204 } 1205 1206 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1207 1208 /* copy over the A part */ 1209 Aloc = (Mat_SeqAIJ*)a->A->data; 1210 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1211 row = a->rstart; 1212 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1213 for (i=0; i<m; i++) { 1214 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1215 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1216 } 1217 aj = Aloc->j; 1218 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1219 1220 /* copy over the B part */ 1221 Aloc = (Mat_SeqAIJ*)a->B->data; 1222 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1223 row = a->rstart; 1224 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1225 ct = cols; 1226 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1227 for (i=0; i<m; i++) { 1228 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1229 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1230 } 1231 ierr = PetscFree(ct);CHKERRQ(ierr); 1232 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1233 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1234 if (matout) { 1235 *matout = B; 1236 } else { 1237 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1238 } 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1244 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1245 { 1246 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1247 Mat a = aij->A,b = aij->B; 1248 int ierr,s1,s2,s3; 1249 1250 PetscFunctionBegin; 1251 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1252 if (rr) { 1253 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1254 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1255 /* Overlap communication with computation. */ 1256 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1257 } 1258 if (ll) { 1259 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1260 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1261 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1262 } 1263 /* scale the diagonal block */ 1264 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1265 1266 if (rr) { 1267 /* Do a scatter end and then right scale the off-diagonal block */ 1268 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1269 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1270 } 1271 1272 PetscFunctionReturn(0); 1273 } 1274 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1278 int MatPrintHelp_MPIAIJ(Mat A) 1279 { 1280 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1281 int ierr; 1282 1283 PetscFunctionBegin; 1284 if (!a->rank) { 1285 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1286 } 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1292 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1293 { 1294 PetscFunctionBegin; 1295 *bs = 1; 1296 PetscFunctionReturn(0); 1297 } 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1300 int MatSetUnfactored_MPIAIJ(Mat A) 1301 { 1302 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1303 int ierr; 1304 1305 PetscFunctionBegin; 1306 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatEqual_MPIAIJ" 1312 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1313 { 1314 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1315 Mat a,b,c,d; 1316 PetscTruth flg; 1317 int ierr; 1318 1319 PetscFunctionBegin; 1320 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1321 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1322 a = matA->A; b = matA->B; 1323 c = matB->A; d = matB->B; 1324 1325 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1326 if (flg == PETSC_TRUE) { 1327 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1328 } 1329 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 #undef __FUNCT__ 1334 #define __FUNCT__ "MatCopy_MPIAIJ" 1335 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1336 { 1337 int ierr; 1338 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1339 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1340 PetscTruth flg; 1341 1342 PetscFunctionBegin; 1343 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1344 if (str != SAME_NONZERO_PATTERN || !flg) { 1345 /* because of the column compression in the off-processor part of the matrix a->B, 1346 the number of columns in a->B and b->B may be different, hence we cannot call 1347 the MatCopy() directly on the two parts. If need be, we can provide a more 1348 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1349 then copying the submatrices */ 1350 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1351 } else { 1352 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1353 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1354 } 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1360 int MatSetUpPreallocation_MPIAIJ(Mat A) 1361 { 1362 int ierr; 1363 1364 PetscFunctionBegin; 1365 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1366 PetscFunctionReturn(0); 1367 } 1368 1369 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1370 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1371 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1372 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1373 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1374 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1375 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1376 #endif 1377 1378 #include "petscblaslapack.h" 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatAXPY_MPIAIJ" 1382 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1383 { 1384 int ierr,one; 1385 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1386 Mat_SeqAIJ *x,*y; 1387 1388 PetscFunctionBegin; 1389 if (str == SAME_NONZERO_PATTERN) { 1390 x = (Mat_SeqAIJ *)xx->A->data; 1391 y = (Mat_SeqAIJ *)yy->A->data; 1392 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1393 x = (Mat_SeqAIJ *)xx->B->data; 1394 y = (Mat_SeqAIJ *)yy->B->data; 1395 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1396 } else { 1397 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1398 } 1399 PetscFunctionReturn(0); 1400 } 1401 1402 /* -------------------------------------------------------------------*/ 1403 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1404 MatGetRow_MPIAIJ, 1405 MatRestoreRow_MPIAIJ, 1406 MatMult_MPIAIJ, 1407 MatMultAdd_MPIAIJ, 1408 MatMultTranspose_MPIAIJ, 1409 MatMultTransposeAdd_MPIAIJ, 1410 0, 1411 0, 1412 0, 1413 0, 1414 0, 1415 0, 1416 MatRelax_MPIAIJ, 1417 MatTranspose_MPIAIJ, 1418 MatGetInfo_MPIAIJ, 1419 MatEqual_MPIAIJ, 1420 MatGetDiagonal_MPIAIJ, 1421 MatDiagonalScale_MPIAIJ, 1422 MatNorm_MPIAIJ, 1423 MatAssemblyBegin_MPIAIJ, 1424 MatAssemblyEnd_MPIAIJ, 1425 0, 1426 MatSetOption_MPIAIJ, 1427 MatZeroEntries_MPIAIJ, 1428 MatZeroRows_MPIAIJ, 1429 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1430 MatLUFactorSymbolic_MPIAIJ_TFS, 1431 #else 1432 0, 1433 #endif 1434 0, 1435 0, 1436 0, 1437 MatSetUpPreallocation_MPIAIJ, 1438 0, 1439 0, 1440 0, 1441 0, 1442 MatDuplicate_MPIAIJ, 1443 0, 1444 0, 1445 0, 1446 0, 1447 MatAXPY_MPIAIJ, 1448 MatGetSubMatrices_MPIAIJ, 1449 MatIncreaseOverlap_MPIAIJ, 1450 MatGetValues_MPIAIJ, 1451 MatCopy_MPIAIJ, 1452 MatPrintHelp_MPIAIJ, 1453 MatScale_MPIAIJ, 1454 0, 1455 0, 1456 0, 1457 MatGetBlockSize_MPIAIJ, 1458 0, 1459 0, 1460 0, 1461 0, 1462 MatFDColoringCreate_MPIAIJ, 1463 0, 1464 MatSetUnfactored_MPIAIJ, 1465 0, 1466 0, 1467 MatGetSubMatrix_MPIAIJ, 1468 MatDestroy_MPIAIJ, 1469 MatView_MPIAIJ, 1470 MatGetPetscMaps_Petsc, 1471 0, 1472 0, 1473 0, 1474 0, 1475 0, 1476 0, 1477 0, 1478 0, 1479 MatSetColoring_MPIAIJ, 1480 MatSetValuesAdic_MPIAIJ, 1481 MatSetValuesAdifor_MPIAIJ 1482 }; 1483 1484 /* ----------------------------------------------------------------------------------------*/ 1485 1486 EXTERN_C_BEGIN 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1489 int MatStoreValues_MPIAIJ(Mat mat) 1490 { 1491 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1492 int ierr; 1493 1494 PetscFunctionBegin; 1495 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1496 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 EXTERN_C_END 1500 1501 EXTERN_C_BEGIN 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1504 int MatRetrieveValues_MPIAIJ(Mat mat) 1505 { 1506 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1507 int ierr; 1508 1509 PetscFunctionBegin; 1510 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1511 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } 1514 EXTERN_C_END 1515 1516 #include "petscpc.h" 1517 EXTERN_C_BEGIN 1518 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1519 EXTERN_C_END 1520 1521 EXTERN_C_BEGIN 1522 #undef __FUNCT__ 1523 #define __FUNCT__ "MatCreate_MPIAIJ" 1524 int MatCreate_MPIAIJ(Mat B) 1525 { 1526 Mat_MPIAIJ *b; 1527 int ierr,i,size; 1528 #if defined(PETSC_HAVE_SUPERLUDIST) 1529 PetscTruth flg; 1530 #endif 1531 1532 1533 PetscFunctionBegin; 1534 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1535 1536 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1537 B->data = (void*)b; 1538 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1539 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1540 B->factor = 0; 1541 B->assembled = PETSC_FALSE; 1542 B->mapping = 0; 1543 1544 B->insertmode = NOT_SET_VALUES; 1545 b->size = size; 1546 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1547 1548 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1549 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1550 1551 /* the information in the maps duplicates the information computed below, eventually 1552 we should remove the duplicate information that is not contained in the maps */ 1553 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1554 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1555 1556 /* build local table of row and column ownerships */ 1557 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1558 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1559 b->cowners = b->rowners + b->size + 2; 1560 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1561 b->rowners[0] = 0; 1562 for (i=2; i<=b->size; i++) { 1563 b->rowners[i] += b->rowners[i-1]; 1564 } 1565 b->rstart = b->rowners[b->rank]; 1566 b->rend = b->rowners[b->rank+1]; 1567 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1568 b->cowners[0] = 0; 1569 for (i=2; i<=b->size; i++) { 1570 b->cowners[i] += b->cowners[i-1]; 1571 } 1572 b->cstart = b->cowners[b->rank]; 1573 b->cend = b->cowners[b->rank+1]; 1574 1575 /* build cache for off array entries formed */ 1576 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1577 b->donotstash = PETSC_FALSE; 1578 b->colmap = 0; 1579 b->garray = 0; 1580 b->roworiented = PETSC_TRUE; 1581 1582 /* stuff used for matrix vector multiply */ 1583 b->lvec = PETSC_NULL; 1584 b->Mvctx = PETSC_NULL; 1585 1586 /* stuff for MatGetRow() */ 1587 b->rowindices = 0; 1588 b->rowvalues = 0; 1589 b->getrowactive = PETSC_FALSE; 1590 1591 #if defined(PETSC_HAVE_SUPERLUDIST) 1592 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr); 1593 if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); } 1594 #endif 1595 1596 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1597 "MatStoreValues_MPIAIJ", 1598 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1600 "MatRetrieveValues_MPIAIJ", 1601 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1603 "MatGetDiagonalBlock_MPIAIJ", 1604 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1605 1606 PetscFunctionReturn(0); 1607 } 1608 EXTERN_C_END 1609 1610 #undef __FUNCT__ 1611 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1612 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1613 { 1614 Mat mat; 1615 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1616 int ierr; 1617 1618 PetscFunctionBegin; 1619 *newmat = 0; 1620 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1621 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1622 a = (Mat_MPIAIJ*)mat->data; 1623 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1624 mat->factor = matin->factor; 1625 mat->assembled = PETSC_TRUE; 1626 mat->insertmode = NOT_SET_VALUES; 1627 mat->preallocated = PETSC_TRUE; 1628 1629 a->rstart = oldmat->rstart; 1630 a->rend = oldmat->rend; 1631 a->cstart = oldmat->cstart; 1632 a->cend = oldmat->cend; 1633 a->size = oldmat->size; 1634 a->rank = oldmat->rank; 1635 a->donotstash = oldmat->donotstash; 1636 a->roworiented = oldmat->roworiented; 1637 a->rowindices = 0; 1638 a->rowvalues = 0; 1639 a->getrowactive = PETSC_FALSE; 1640 1641 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1642 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1643 if (oldmat->colmap) { 1644 #if defined (PETSC_USE_CTABLE) 1645 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1646 #else 1647 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1648 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1649 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1650 #endif 1651 } else a->colmap = 0; 1652 if (oldmat->garray) { 1653 int len; 1654 len = oldmat->B->n; 1655 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1656 PetscLogObjectMemory(mat,len*sizeof(int)); 1657 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1658 } else a->garray = 0; 1659 1660 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1661 PetscLogObjectParent(mat,a->lvec); 1662 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1663 PetscLogObjectParent(mat,a->Mvctx); 1664 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1665 PetscLogObjectParent(mat,a->A); 1666 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1667 PetscLogObjectParent(mat,a->B); 1668 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1669 *newmat = mat; 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #include "petscsys.h" 1674 1675 EXTERN_C_BEGIN 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatLoad_MPIAIJ" 1678 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1679 { 1680 Mat A; 1681 PetscScalar *vals,*svals; 1682 MPI_Comm comm = ((PetscObject)viewer)->comm; 1683 MPI_Status status; 1684 int i,nz,ierr,j,rstart,rend,fd; 1685 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1686 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1687 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1688 1689 PetscFunctionBegin; 1690 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1691 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1692 if (!rank) { 1693 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1694 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1695 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1696 if (header[3] < 0) { 1697 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1698 } 1699 } 1700 1701 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1702 M = header[1]; N = header[2]; 1703 /* determine ownership of all rows */ 1704 m = M/size + ((M % size) > rank); 1705 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1706 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1707 rowners[0] = 0; 1708 for (i=2; i<=size; i++) { 1709 rowners[i] += rowners[i-1]; 1710 } 1711 rstart = rowners[rank]; 1712 rend = rowners[rank+1]; 1713 1714 /* distribute row lengths to all processors */ 1715 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1716 offlens = ourlens + (rend-rstart); 1717 if (!rank) { 1718 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1719 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1720 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1721 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1722 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1723 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1724 } else { 1725 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1726 } 1727 1728 if (!rank) { 1729 /* calculate the number of nonzeros on each processor */ 1730 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1731 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1732 for (i=0; i<size; i++) { 1733 for (j=rowners[i]; j< rowners[i+1]; j++) { 1734 procsnz[i] += rowlengths[j]; 1735 } 1736 } 1737 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1738 1739 /* determine max buffer needed and allocate it */ 1740 maxnz = 0; 1741 for (i=0; i<size; i++) { 1742 maxnz = PetscMax(maxnz,procsnz[i]); 1743 } 1744 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1745 1746 /* read in my part of the matrix column indices */ 1747 nz = procsnz[0]; 1748 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1749 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1750 1751 /* read in every one elses and ship off */ 1752 for (i=1; i<size; i++) { 1753 nz = procsnz[i]; 1754 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1755 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1756 } 1757 ierr = PetscFree(cols);CHKERRQ(ierr); 1758 } else { 1759 /* determine buffer space needed for message */ 1760 nz = 0; 1761 for (i=0; i<m; i++) { 1762 nz += ourlens[i]; 1763 } 1764 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1765 1766 /* receive message of column indices*/ 1767 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1768 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1769 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1770 } 1771 1772 /* determine column ownership if matrix is not square */ 1773 if (N != M) { 1774 n = N/size + ((N % size) > rank); 1775 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1776 cstart = cend - n; 1777 } else { 1778 cstart = rstart; 1779 cend = rend; 1780 n = cend - cstart; 1781 } 1782 1783 /* loop over local rows, determining number of off diagonal entries */ 1784 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1785 jj = 0; 1786 for (i=0; i<m; i++) { 1787 for (j=0; j<ourlens[i]; j++) { 1788 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1789 jj++; 1790 } 1791 } 1792 1793 /* create our matrix */ 1794 for (i=0; i<m; i++) { 1795 ourlens[i] -= offlens[i]; 1796 } 1797 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1798 A = *newmat; 1799 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1800 for (i=0; i<m; i++) { 1801 ourlens[i] += offlens[i]; 1802 } 1803 1804 if (!rank) { 1805 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1806 1807 /* read in my part of the matrix numerical values */ 1808 nz = procsnz[0]; 1809 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1810 1811 /* insert into matrix */ 1812 jj = rstart; 1813 smycols = mycols; 1814 svals = vals; 1815 for (i=0; i<m; i++) { 1816 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1817 smycols += ourlens[i]; 1818 svals += ourlens[i]; 1819 jj++; 1820 } 1821 1822 /* read in other processors and ship out */ 1823 for (i=1; i<size; i++) { 1824 nz = procsnz[i]; 1825 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1826 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1827 } 1828 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1829 } else { 1830 /* receive numeric values */ 1831 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1832 1833 /* receive message of values*/ 1834 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1835 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1836 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1837 1838 /* insert into matrix */ 1839 jj = rstart; 1840 smycols = mycols; 1841 svals = vals; 1842 for (i=0; i<m; i++) { 1843 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1844 smycols += ourlens[i]; 1845 svals += ourlens[i]; 1846 jj++; 1847 } 1848 } 1849 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1850 ierr = PetscFree(vals);CHKERRQ(ierr); 1851 ierr = PetscFree(mycols);CHKERRQ(ierr); 1852 ierr = PetscFree(rowners);CHKERRQ(ierr); 1853 1854 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1855 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1856 PetscFunctionReturn(0); 1857 } 1858 EXTERN_C_END 1859 1860 #undef __FUNCT__ 1861 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1862 /* 1863 Not great since it makes two copies of the submatrix, first an SeqAIJ 1864 in local and then by concatenating the local matrices the end result. 1865 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1866 */ 1867 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1868 { 1869 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1870 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1871 Mat *local,M,Mreuse; 1872 PetscScalar *vwork,*aa; 1873 MPI_Comm comm = mat->comm; 1874 Mat_SeqAIJ *aij; 1875 1876 1877 PetscFunctionBegin; 1878 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1879 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1880 1881 if (call == MAT_REUSE_MATRIX) { 1882 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1883 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1884 local = &Mreuse; 1885 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1886 } else { 1887 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1888 Mreuse = *local; 1889 ierr = PetscFree(local);CHKERRQ(ierr); 1890 } 1891 1892 /* 1893 m - number of local rows 1894 n - number of columns (same on all processors) 1895 rstart - first row in new global matrix generated 1896 */ 1897 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1898 if (call == MAT_INITIAL_MATRIX) { 1899 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1900 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1901 ii = aij->i; 1902 jj = aij->j; 1903 1904 /* 1905 Determine the number of non-zeros in the diagonal and off-diagonal 1906 portions of the matrix in order to do correct preallocation 1907 */ 1908 1909 /* first get start and end of "diagonal" columns */ 1910 if (csize == PETSC_DECIDE) { 1911 nlocal = n/size + ((n % size) > rank); 1912 } else { 1913 nlocal = csize; 1914 } 1915 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1916 rstart = rend - nlocal; 1917 if (rank == size - 1 && rend != n) { 1918 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1919 } 1920 1921 /* next, compute all the lengths */ 1922 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1923 olens = dlens + m; 1924 for (i=0; i<m; i++) { 1925 jend = ii[i+1] - ii[i]; 1926 olen = 0; 1927 dlen = 0; 1928 for (j=0; j<jend; j++) { 1929 if (*jj < rstart || *jj >= rend) olen++; 1930 else dlen++; 1931 jj++; 1932 } 1933 olens[i] = olen; 1934 dlens[i] = dlen; 1935 } 1936 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1937 ierr = PetscFree(dlens);CHKERRQ(ierr); 1938 } else { 1939 int ml,nl; 1940 1941 M = *newmat; 1942 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1943 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1944 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1945 /* 1946 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1947 rather than the slower MatSetValues(). 1948 */ 1949 M->was_assembled = PETSC_TRUE; 1950 M->assembled = PETSC_FALSE; 1951 } 1952 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1953 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1954 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1955 ii = aij->i; 1956 jj = aij->j; 1957 aa = aij->a; 1958 for (i=0; i<m; i++) { 1959 row = rstart + i; 1960 nz = ii[i+1] - ii[i]; 1961 cwork = jj; jj += nz; 1962 vwork = aa; aa += nz; 1963 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1964 } 1965 1966 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1967 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1968 *newmat = M; 1969 1970 /* save submatrix used in processor for next request */ 1971 if (call == MAT_INITIAL_MATRIX) { 1972 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1973 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1974 } 1975 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatMPIAIJSetPreallocation" 1981 /*@C 1982 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1983 (the default parallel PETSc format). For good matrix assembly performance 1984 the user should preallocate the matrix storage by setting the parameters 1985 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1986 performance can be increased by more than a factor of 50. 1987 1988 Collective on MPI_Comm 1989 1990 Input Parameters: 1991 + A - the matrix 1992 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1993 (same value is used for all local rows) 1994 . d_nnz - array containing the number of nonzeros in the various rows of the 1995 DIAGONAL portion of the local submatrix (possibly different for each row) 1996 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1997 The size of this array is equal to the number of local rows, i.e 'm'. 1998 You must leave room for the diagonal entry even if it is zero. 1999 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2000 submatrix (same value is used for all local rows). 2001 - o_nnz - array containing the number of nonzeros in the various rows of the 2002 OFF-DIAGONAL portion of the local submatrix (possibly different for 2003 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2004 structure. The size of this array is equal to the number 2005 of local rows, i.e 'm'. 2006 2007 The AIJ format (also called the Yale sparse matrix format or 2008 compressed row storage), is fully compatible with standard Fortran 77 2009 storage. That is, the stored row and column indices can begin at 2010 either one (as in Fortran) or zero. See the users manual for details. 2011 2012 The user MUST specify either the local or global matrix dimensions 2013 (possibly both). 2014 2015 The parallel matrix is partitioned such that the first m0 rows belong to 2016 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2017 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2018 2019 The DIAGONAL portion of the local submatrix of a processor can be defined 2020 as the submatrix which is obtained by extraction the part corresponding 2021 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2022 first row that belongs to the processor, and r2 is the last row belonging 2023 to the this processor. This is a square mxm matrix. The remaining portion 2024 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2025 2026 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2027 2028 By default, this format uses inodes (identical nodes) when possible. 2029 We search for consecutive rows with the same nonzero structure, thereby 2030 reusing matrix information to achieve increased efficiency. 2031 2032 Options Database Keys: 2033 + -mat_aij_no_inode - Do not use inodes 2034 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2035 - -mat_aij_oneindex - Internally use indexing starting at 1 2036 rather than 0. Note that when calling MatSetValues(), 2037 the user still MUST index entries starting at 0! 2038 2039 Example usage: 2040 2041 Consider the following 8x8 matrix with 34 non-zero values, that is 2042 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2043 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2044 as follows: 2045 2046 .vb 2047 1 2 0 | 0 3 0 | 0 4 2048 Proc0 0 5 6 | 7 0 0 | 8 0 2049 9 0 10 | 11 0 0 | 12 0 2050 ------------------------------------- 2051 13 0 14 | 15 16 17 | 0 0 2052 Proc1 0 18 0 | 19 20 21 | 0 0 2053 0 0 0 | 22 23 0 | 24 0 2054 ------------------------------------- 2055 Proc2 25 26 27 | 0 0 28 | 29 0 2056 30 0 0 | 31 32 33 | 0 34 2057 .ve 2058 2059 This can be represented as a collection of submatrices as: 2060 2061 .vb 2062 A B C 2063 D E F 2064 G H I 2065 .ve 2066 2067 Where the submatrices A,B,C are owned by proc0, D,E,F are 2068 owned by proc1, G,H,I are owned by proc2. 2069 2070 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2071 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2072 The 'M','N' parameters are 8,8, and have the same values on all procs. 2073 2074 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2075 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2076 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2077 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2078 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2079 matrix, ans [DF] as another SeqAIJ matrix. 2080 2081 When d_nz, o_nz parameters are specified, d_nz storage elements are 2082 allocated for every row of the local diagonal submatrix, and o_nz 2083 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2084 One way to choose d_nz and o_nz is to use the max nonzerors per local 2085 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2086 In this case, the values of d_nz,o_nz are: 2087 .vb 2088 proc0 : dnz = 2, o_nz = 2 2089 proc1 : dnz = 3, o_nz = 2 2090 proc2 : dnz = 1, o_nz = 4 2091 .ve 2092 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2093 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2094 for proc3. i.e we are using 12+15+10=37 storage locations to store 2095 34 values. 2096 2097 When d_nnz, o_nnz parameters are specified, the storage is specified 2098 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2099 In the above case the values for d_nnz,o_nnz are: 2100 .vb 2101 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2102 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2103 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2104 .ve 2105 Here the space allocated is sum of all the above values i.e 34, and 2106 hence pre-allocation is perfect. 2107 2108 Level: intermediate 2109 2110 .keywords: matrix, aij, compressed row, sparse, parallel 2111 2112 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2113 @*/ 2114 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2115 { 2116 Mat_MPIAIJ *b; 2117 int ierr,i; 2118 PetscTruth flg2; 2119 2120 PetscFunctionBegin; 2121 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2122 if (!flg2) PetscFunctionReturn(0); 2123 B->preallocated = PETSC_TRUE; 2124 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2125 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2126 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2127 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2128 if (d_nnz) { 2129 for (i=0; i<B->m; i++) { 2130 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2131 } 2132 } 2133 if (o_nnz) { 2134 for (i=0; i<B->m; i++) { 2135 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2136 } 2137 } 2138 b = (Mat_MPIAIJ*)B->data; 2139 2140 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2141 PetscLogObjectParent(B,b->A); 2142 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2143 PetscLogObjectParent(B,b->B); 2144 2145 PetscFunctionReturn(0); 2146 } 2147 2148 #undef __FUNCT__ 2149 #define __FUNCT__ "MatCreateMPIAIJ" 2150 /*@C 2151 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2152 (the default parallel PETSc format). For good matrix assembly performance 2153 the user should preallocate the matrix storage by setting the parameters 2154 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2155 performance can be increased by more than a factor of 50. 2156 2157 Collective on MPI_Comm 2158 2159 Input Parameters: 2160 + comm - MPI communicator 2161 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2162 This value should be the same as the local size used in creating the 2163 y vector for the matrix-vector product y = Ax. 2164 . n - This value should be the same as the local size used in creating the 2165 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2166 calculated if N is given) For square matrices n is almost always m. 2167 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2168 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2169 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2170 (same value is used for all local rows) 2171 . d_nnz - array containing the number of nonzeros in the various rows of the 2172 DIAGONAL portion of the local submatrix (possibly different for each row) 2173 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2174 The size of this array is equal to the number of local rows, i.e 'm'. 2175 You must leave room for the diagonal entry even if it is zero. 2176 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2177 submatrix (same value is used for all local rows). 2178 - o_nnz - array containing the number of nonzeros in the various rows of the 2179 OFF-DIAGONAL portion of the local submatrix (possibly different for 2180 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2181 structure. The size of this array is equal to the number 2182 of local rows, i.e 'm'. 2183 2184 Output Parameter: 2185 . A - the matrix 2186 2187 Notes: 2188 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2189 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2190 storage requirements for this matrix. 2191 2192 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2193 processor than it must be used on all processors that share the object for 2194 that argument. 2195 2196 The AIJ format (also called the Yale sparse matrix format or 2197 compressed row storage), is fully compatible with standard Fortran 77 2198 storage. That is, the stored row and column indices can begin at 2199 either one (as in Fortran) or zero. See the users manual for details. 2200 2201 The user MUST specify either the local or global matrix dimensions 2202 (possibly both). 2203 2204 The parallel matrix is partitioned such that the first m0 rows belong to 2205 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2206 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2207 2208 The DIAGONAL portion of the local submatrix of a processor can be defined 2209 as the submatrix which is obtained by extraction the part corresponding 2210 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2211 first row that belongs to the processor, and r2 is the last row belonging 2212 to the this processor. This is a square mxm matrix. The remaining portion 2213 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2214 2215 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2216 2217 By default, this format uses inodes (identical nodes) when possible. 2218 We search for consecutive rows with the same nonzero structure, thereby 2219 reusing matrix information to achieve increased efficiency. 2220 2221 Options Database Keys: 2222 + -mat_aij_no_inode - Do not use inodes 2223 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2224 - -mat_aij_oneindex - Internally use indexing starting at 1 2225 rather than 0. Note that when calling MatSetValues(), 2226 the user still MUST index entries starting at 0! 2227 2228 2229 Example usage: 2230 2231 Consider the following 8x8 matrix with 34 non-zero values, that is 2232 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2233 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2234 as follows: 2235 2236 .vb 2237 1 2 0 | 0 3 0 | 0 4 2238 Proc0 0 5 6 | 7 0 0 | 8 0 2239 9 0 10 | 11 0 0 | 12 0 2240 ------------------------------------- 2241 13 0 14 | 15 16 17 | 0 0 2242 Proc1 0 18 0 | 19 20 21 | 0 0 2243 0 0 0 | 22 23 0 | 24 0 2244 ------------------------------------- 2245 Proc2 25 26 27 | 0 0 28 | 29 0 2246 30 0 0 | 31 32 33 | 0 34 2247 .ve 2248 2249 This can be represented as a collection of submatrices as: 2250 2251 .vb 2252 A B C 2253 D E F 2254 G H I 2255 .ve 2256 2257 Where the submatrices A,B,C are owned by proc0, D,E,F are 2258 owned by proc1, G,H,I are owned by proc2. 2259 2260 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2261 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2262 The 'M','N' parameters are 8,8, and have the same values on all procs. 2263 2264 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2265 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2266 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2267 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2268 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2269 matrix, ans [DF] as another SeqAIJ matrix. 2270 2271 When d_nz, o_nz parameters are specified, d_nz storage elements are 2272 allocated for every row of the local diagonal submatrix, and o_nz 2273 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2274 One way to choose d_nz and o_nz is to use the max nonzerors per local 2275 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2276 In this case, the values of d_nz,o_nz are: 2277 .vb 2278 proc0 : dnz = 2, o_nz = 2 2279 proc1 : dnz = 3, o_nz = 2 2280 proc2 : dnz = 1, o_nz = 4 2281 .ve 2282 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2283 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2284 for proc3. i.e we are using 12+15+10=37 storage locations to store 2285 34 values. 2286 2287 When d_nnz, o_nnz parameters are specified, the storage is specified 2288 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2289 In the above case the values for d_nnz,o_nnz are: 2290 .vb 2291 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2292 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2293 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2294 .ve 2295 Here the space allocated is sum of all the above values i.e 34, and 2296 hence pre-allocation is perfect. 2297 2298 Level: intermediate 2299 2300 .keywords: matrix, aij, compressed row, sparse, parallel 2301 2302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2303 @*/ 2304 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2305 { 2306 int ierr,size; 2307 2308 PetscFunctionBegin; 2309 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2310 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2311 if (size > 1) { 2312 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2313 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2314 } else { 2315 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2316 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2317 } 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2323 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2324 { 2325 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2326 PetscFunctionBegin; 2327 *Ad = a->A; 2328 *Ao = a->B; 2329 *colmap = a->garray; 2330 PetscFunctionReturn(0); 2331 } 2332 2333 #undef __FUNCT__ 2334 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2335 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2336 { 2337 int ierr; 2338 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2339 2340 PetscFunctionBegin; 2341 if (coloring->ctype == IS_COLORING_LOCAL) { 2342 int *allcolors,*colors,i; 2343 ISColoring ocoloring; 2344 2345 /* set coloring for diagonal portion */ 2346 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2347 2348 /* set coloring for off-diagonal portion */ 2349 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2350 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2351 for (i=0; i<a->B->n; i++) { 2352 colors[i] = allcolors[a->garray[i]]; 2353 } 2354 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2355 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2356 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2357 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2358 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2359 int *colors,i,*larray; 2360 ISColoring ocoloring; 2361 2362 /* set coloring for diagonal portion */ 2363 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2364 for (i=0; i<a->A->n; i++) { 2365 larray[i] = i + a->cstart; 2366 } 2367 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2368 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2369 for (i=0; i<a->A->n; i++) { 2370 colors[i] = coloring->colors[larray[i]]; 2371 } 2372 ierr = PetscFree(larray);CHKERRQ(ierr); 2373 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2374 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2375 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2376 2377 /* set coloring for off-diagonal portion */ 2378 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2379 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2380 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2381 for (i=0; i<a->B->n; i++) { 2382 colors[i] = coloring->colors[larray[i]]; 2383 } 2384 ierr = PetscFree(larray);CHKERRQ(ierr); 2385 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2386 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2387 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2388 } else { 2389 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2390 } 2391 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2397 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2398 { 2399 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2400 int ierr; 2401 2402 PetscFunctionBegin; 2403 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2404 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2405 PetscFunctionReturn(0); 2406 } 2407 2408 #undef __FUNCT__ 2409 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2410 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2411 { 2412 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2413 int ierr; 2414 2415 PetscFunctionBegin; 2416 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2417 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420