1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 int n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (int*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(int));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (int*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(int));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 int i,j,rstart = aij->rstart,rend = aij->rend; 192 int cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 int nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 int i,j,rstart = aij->rstart,rend = aij->rend; 278 int cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 int nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 int i,j,rstart,ncols,n,flg; 351 int *row,*col,other_disassembled; 352 PetscScalar *val; 353 InsertMode addv = mat->insertmode; 354 355 PetscFunctionBegin; 356 if (!aij->donotstash) { 357 while (1) { 358 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 359 if (!flg) break; 360 361 for (i=0; i<n;) { 362 /* Now identify the consecutive vals belonging to the same row */ 363 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 364 if (j < n) ncols = j-i; 365 else ncols = n-i; 366 /* Now assemble all these values with a single function call */ 367 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 368 i = j; 369 } 370 } 371 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 372 } 373 374 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 375 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 376 377 /* determine if any processor has disassembled, if so we must 378 also disassemble ourselfs, in order that we may reassemble. */ 379 /* 380 if nonzero structure of submatrix B cannot change then we know that 381 no processor disassembled thus we can skip this stuff 382 */ 383 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 384 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 385 if (mat->was_assembled && !other_disassembled) { 386 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 387 } 388 } 389 390 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 391 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 392 } 393 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 395 396 if (aij->rowvalues) { 397 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 398 aij->rowvalues = 0; 399 } 400 401 /* used by MatAXPY() */ 402 a->xtoy = 0; b->xtoy = 0; 403 a->XtoY = 0; b->XtoY = 0; 404 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 410 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 411 { 412 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 413 PetscErrorCode ierr; 414 415 PetscFunctionBegin; 416 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 417 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatZeroRows_MPIAIJ" 423 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 424 { 425 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 426 PetscErrorCode ierr; 427 int i,N,*rows,*owners = l->rowners,size = l->size; 428 int *nprocs,j,idx,nsends,row; 429 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 430 int *rvalues,tag = A->tag,count,base,slen,n,*source; 431 int *lens,imdex,*lrows,*values,rstart=l->rstart; 432 MPI_Comm comm = A->comm; 433 MPI_Request *send_waits,*recv_waits; 434 MPI_Status recv_status,*send_status; 435 IS istmp; 436 PetscTruth found; 437 438 PetscFunctionBegin; 439 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 440 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 441 442 /* first count number of contributors to each processor */ 443 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 444 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 445 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 446 for (i=0; i<N; i++) { 447 idx = rows[i]; 448 found = PETSC_FALSE; 449 for (j=0; j<size; j++) { 450 if (idx >= owners[j] && idx < owners[j+1]) { 451 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 452 } 453 } 454 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 455 } 456 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 457 458 /* inform other processors of number of messages and max length*/ 459 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 460 461 /* post receives: */ 462 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 463 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 464 for (i=0; i<nrecvs; i++) { 465 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 466 } 467 468 /* do sends: 469 1) starts[i] gives the starting index in svalues for stuff going to 470 the ith processor 471 */ 472 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 473 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 474 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 475 starts[0] = 0; 476 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 477 for (i=0; i<N; i++) { 478 svalues[starts[owner[i]]++] = rows[i]; 479 } 480 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 481 482 starts[0] = 0; 483 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 484 count = 0; 485 for (i=0; i<size; i++) { 486 if (nprocs[2*i+1]) { 487 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 488 } 489 } 490 ierr = PetscFree(starts);CHKERRQ(ierr); 491 492 base = owners[rank]; 493 494 /* wait on receives */ 495 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 496 source = lens + nrecvs; 497 count = nrecvs; slen = 0; 498 while (count) { 499 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 500 /* unpack receives into our local space */ 501 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 502 source[imdex] = recv_status.MPI_SOURCE; 503 lens[imdex] = n; 504 slen += n; 505 count--; 506 } 507 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 508 509 /* move the data into the send scatter */ 510 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 511 count = 0; 512 for (i=0; i<nrecvs; i++) { 513 values = rvalues + i*nmax; 514 for (j=0; j<lens[i]; j++) { 515 lrows[count++] = values[j] - base; 516 } 517 } 518 ierr = PetscFree(rvalues);CHKERRQ(ierr); 519 ierr = PetscFree(lens);CHKERRQ(ierr); 520 ierr = PetscFree(owner);CHKERRQ(ierr); 521 ierr = PetscFree(nprocs);CHKERRQ(ierr); 522 523 /* actually zap the local rows */ 524 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 525 PetscLogObjectParent(A,istmp); 526 527 /* 528 Zero the required rows. If the "diagonal block" of the matrix 529 is square and the user wishes to set the diagonal we use seperate 530 code so that MatSetValues() is not called for each diagonal allocating 531 new memory, thus calling lots of mallocs and slowing things down. 532 533 Contributed by: Mathew Knepley 534 */ 535 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 536 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 537 if (diag && (l->A->M == l->A->N)) { 538 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 539 } else if (diag) { 540 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 541 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 542 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 544 } 545 for (i = 0; i < slen; i++) { 546 row = lrows[i] + rstart; 547 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 548 } 549 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 } else { 552 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 553 } 554 ierr = ISDestroy(istmp);CHKERRQ(ierr); 555 ierr = PetscFree(lrows);CHKERRQ(ierr); 556 557 /* wait on sends */ 558 if (nsends) { 559 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 560 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 561 ierr = PetscFree(send_status);CHKERRQ(ierr); 562 } 563 ierr = PetscFree(send_waits);CHKERRQ(ierr); 564 ierr = PetscFree(svalues);CHKERRQ(ierr); 565 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatMult_MPIAIJ" 571 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 572 { 573 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 574 PetscErrorCode ierr; 575 int nt; 576 577 PetscFunctionBegin; 578 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 579 if (nt != A->n) { 580 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 581 } 582 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 583 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 584 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMultAdd_MPIAIJ" 591 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 PetscErrorCode ierr; 595 596 PetscFunctionBegin; 597 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 599 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 606 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 607 { 608 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 609 PetscErrorCode ierr; 610 611 PetscFunctionBegin; 612 /* do nondiagonal part */ 613 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 614 /* send it on its way */ 615 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616 /* do local part */ 617 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 618 /* receive remote parts: note this assumes the values are not actually */ 619 /* inserted in yy until the next line, which is true for my implementation*/ 620 /* but is not perhaps always true. */ 621 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 EXTERN_C_BEGIN 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 628 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 629 { 630 MPI_Comm comm; 631 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 632 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 633 IS Me,Notme; 634 PetscErrorCode ierr; 635 int M,N,first,last,*notme,ntids,i; 636 637 PetscFunctionBegin; 638 639 /* Easy test: symmetric diagonal block */ 640 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 641 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 642 if (!*f) PetscFunctionReturn(0); 643 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 644 ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr); 645 if (ntids==1) PetscFunctionReturn(0); 646 647 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 648 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 649 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 650 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me);CHKERRQ(ierr); 651 for (i=0; i<first; i++) notme[i] = i; 652 for (i=last; i<M; i++) notme[i-last+first] = i; 653 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 654 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 655 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 656 Aoff = Aoffs[0]; 657 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 658 Boff = Boffs[0]; 659 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 660 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 661 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 662 ierr = ISDestroy(Me);CHKERRQ(ierr); 663 ierr = ISDestroy(Notme);CHKERRQ(ierr); 664 665 PetscFunctionReturn(0); 666 } 667 EXTERN_C_END 668 669 #undef __FUNCT__ 670 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 671 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 672 { 673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 674 PetscErrorCode ierr; 675 676 PetscFunctionBegin; 677 /* do nondiagonal part */ 678 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 679 /* send it on its way */ 680 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 681 /* do local part */ 682 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 683 /* receive remote parts: note this assumes the values are not actually */ 684 /* inserted in yy until the next line, which is true for my implementation*/ 685 /* but is not perhaps always true. */ 686 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 /* 691 This only works correctly for square matrices where the subblock A->A is the 692 diagonal block 693 */ 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 696 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 697 { 698 PetscErrorCode ierr; 699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 700 701 PetscFunctionBegin; 702 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 703 if (a->rstart != a->cstart || a->rend != a->cend) { 704 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 705 } 706 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatScale_MPIAIJ" 712 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 713 { 714 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 715 PetscErrorCode ierr; 716 717 PetscFunctionBegin; 718 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 719 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatDestroy_MPIAIJ" 725 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 726 { 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 728 PetscErrorCode ierr; 729 730 PetscFunctionBegin; 731 #if defined(PETSC_USE_LOG) 732 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 733 #endif 734 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 735 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 736 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 737 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 738 #if defined (PETSC_USE_CTABLE) 739 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 740 #else 741 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 742 #endif 743 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 744 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 745 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 746 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 747 ierr = PetscFree(aij);CHKERRQ(ierr); 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatView_MPIAIJ_Binary" 753 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 754 { 755 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 756 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 757 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 758 PetscErrorCode ierr; 759 int nz,fd,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 760 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 761 PetscScalar *column_values; 762 763 PetscFunctionBegin; 764 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 765 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 766 nz = A->nz + B->nz; 767 if (!rank) { 768 header[0] = MAT_FILE_COOKIE; 769 header[1] = mat->M; 770 header[2] = mat->N; 771 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 772 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 773 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 774 /* get largest number of rows any processor has */ 775 rlen = mat->m; 776 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 777 for (i=1; i<size; i++) { 778 rlen = PetscMax(rlen,range[i+1] - range[i]); 779 } 780 } else { 781 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 782 rlen = mat->m; 783 } 784 785 /* load up the local row counts */ 786 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 787 for (i=0; i<mat->m; i++) { 788 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 789 } 790 791 /* store the row lengths to the file */ 792 if (!rank) { 793 MPI_Status status; 794 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 795 for (i=1; i<size; i++) { 796 rlen = range[i+1] - range[i]; 797 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 798 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 799 } 800 } else { 801 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 802 } 803 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 804 805 /* load up the local column indices */ 806 nzmax = nz; /* )th processor needs space a largest processor needs */ 807 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 808 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 809 cnt = 0; 810 for (i=0; i<mat->m; i++) { 811 for (j=B->i[i]; j<B->i[i+1]; j++) { 812 if ( (col = garray[B->j[j]]) > cstart) break; 813 column_indices[cnt++] = col; 814 } 815 for (k=A->i[i]; k<A->i[i+1]; k++) { 816 column_indices[cnt++] = A->j[k] + cstart; 817 } 818 for (; j<B->i[i+1]; j++) { 819 column_indices[cnt++] = garray[B->j[j]]; 820 } 821 } 822 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 823 824 /* store the column indices to the file */ 825 if (!rank) { 826 MPI_Status status; 827 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 828 for (i=1; i<size; i++) { 829 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 830 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 831 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 832 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 833 } 834 } else { 835 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 836 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 837 } 838 ierr = PetscFree(column_indices);CHKERRQ(ierr); 839 840 /* load up the local column values */ 841 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 842 cnt = 0; 843 for (i=0; i<mat->m; i++) { 844 for (j=B->i[i]; j<B->i[i+1]; j++) { 845 if ( garray[B->j[j]] > cstart) break; 846 column_values[cnt++] = B->a[j]; 847 } 848 for (k=A->i[i]; k<A->i[i+1]; k++) { 849 column_values[cnt++] = A->a[k]; 850 } 851 for (; j<B->i[i+1]; j++) { 852 column_values[cnt++] = B->a[j]; 853 } 854 } 855 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 856 857 /* store the column values to the file */ 858 if (!rank) { 859 MPI_Status status; 860 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 861 for (i=1; i<size; i++) { 862 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 863 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 864 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 865 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 866 } 867 } else { 868 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 869 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 870 } 871 ierr = PetscFree(column_values);CHKERRQ(ierr); 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 877 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 878 { 879 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 880 PetscErrorCode ierr; 881 int rank = aij->rank,size = aij->size; 882 PetscTruth isdraw,iascii,flg,isbinary; 883 PetscViewer sviewer; 884 PetscViewerFormat format; 885 886 PetscFunctionBegin; 887 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 888 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 889 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 890 if (iascii) { 891 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 892 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 893 MatInfo info; 894 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 895 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 896 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 897 if (flg) { 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 899 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 900 } else { 901 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 902 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 903 } 904 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 905 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 906 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 907 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 908 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 909 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 910 PetscFunctionReturn(0); 911 } else if (format == PETSC_VIEWER_ASCII_INFO) { 912 PetscFunctionReturn(0); 913 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 914 PetscFunctionReturn(0); 915 } 916 } else if (isbinary) { 917 if (size == 1) { 918 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 919 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 920 } else { 921 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 922 } 923 PetscFunctionReturn(0); 924 } else if (isdraw) { 925 PetscDraw draw; 926 PetscTruth isnull; 927 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 928 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 929 } 930 931 if (size == 1) { 932 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 933 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 934 } else { 935 /* assemble the entire matrix onto first processor. */ 936 Mat A; 937 Mat_SeqAIJ *Aloc; 938 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 939 PetscScalar *a; 940 941 if (!rank) { 942 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 943 } else { 944 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 945 } 946 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 947 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 948 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 949 PetscLogObjectParent(mat,A); 950 951 /* copy over the A part */ 952 Aloc = (Mat_SeqAIJ*)aij->A->data; 953 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 954 row = aij->rstart; 955 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 956 for (i=0; i<m; i++) { 957 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 958 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 959 } 960 aj = Aloc->j; 961 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 962 963 /* copy over the B part */ 964 Aloc = (Mat_SeqAIJ*)aij->B->data; 965 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 966 row = aij->rstart; 967 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 968 ct = cols; 969 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 970 for (i=0; i<m; i++) { 971 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 972 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 973 } 974 ierr = PetscFree(ct);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 /* 978 Everyone has to call to draw the matrix since the graphics waits are 979 synchronized across all processors that share the PetscDraw object 980 */ 981 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 982 if (!rank) { 983 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 984 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 985 } 986 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 ierr = MatDestroy(A);CHKERRQ(ierr); 988 } 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatView_MPIAIJ" 994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 995 { 996 PetscErrorCode ierr; 997 PetscTruth iascii,isdraw,issocket,isbinary; 998 999 PetscFunctionBegin; 1000 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1004 if (iascii || isdraw || isbinary || issocket) { 1005 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1006 } else { 1007 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 PetscScalar mone=-1.0; 1022 1023 PetscFunctionBegin; 1024 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1025 1026 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1027 1028 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1029 if (flag & SOR_ZERO_INITIAL_GUESS) { 1030 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1031 its--; 1032 } 1033 1034 while (its--) { 1035 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1037 1038 /* update rhs: bb1 = bb - B*x */ 1039 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1040 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1041 1042 /* local sweep */ 1043 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1044 CHKERRQ(ierr); 1045 } 1046 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1047 if (flag & SOR_ZERO_INITIAL_GUESS) { 1048 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1049 its--; 1050 } 1051 while (its--) { 1052 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 1055 /* update rhs: bb1 = bb - B*x */ 1056 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1057 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1058 1059 /* local sweep */ 1060 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1061 CHKERRQ(ierr); 1062 } 1063 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1064 if (flag & SOR_ZERO_INITIAL_GUESS) { 1065 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1066 its--; 1067 } 1068 while (its--) { 1069 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1071 1072 /* update rhs: bb1 = bb - B*x */ 1073 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1074 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1075 1076 /* local sweep */ 1077 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1078 CHKERRQ(ierr); 1079 } 1080 } else { 1081 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1082 } 1083 1084 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1090 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1091 { 1092 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1093 Mat A = mat->A,B = mat->B; 1094 PetscErrorCode ierr; 1095 PetscReal isend[5],irecv[5]; 1096 1097 PetscFunctionBegin; 1098 info->block_size = 1.0; 1099 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1100 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1101 isend[3] = info->memory; isend[4] = info->mallocs; 1102 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1103 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1104 isend[3] += info->memory; isend[4] += info->mallocs; 1105 if (flag == MAT_LOCAL) { 1106 info->nz_used = isend[0]; 1107 info->nz_allocated = isend[1]; 1108 info->nz_unneeded = isend[2]; 1109 info->memory = isend[3]; 1110 info->mallocs = isend[4]; 1111 } else if (flag == MAT_GLOBAL_MAX) { 1112 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1113 info->nz_used = irecv[0]; 1114 info->nz_allocated = irecv[1]; 1115 info->nz_unneeded = irecv[2]; 1116 info->memory = irecv[3]; 1117 info->mallocs = irecv[4]; 1118 } else if (flag == MAT_GLOBAL_SUM) { 1119 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1120 info->nz_used = irecv[0]; 1121 info->nz_allocated = irecv[1]; 1122 info->nz_unneeded = irecv[2]; 1123 info->memory = irecv[3]; 1124 info->mallocs = irecv[4]; 1125 } 1126 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1127 info->fill_ratio_needed = 0; 1128 info->factor_mallocs = 0; 1129 info->rows_global = (double)matin->M; 1130 info->columns_global = (double)matin->N; 1131 info->rows_local = (double)matin->m; 1132 info->columns_local = (double)matin->N; 1133 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatSetOption_MPIAIJ" 1139 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1140 { 1141 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1142 PetscErrorCode ierr; 1143 1144 PetscFunctionBegin; 1145 switch (op) { 1146 case MAT_NO_NEW_NONZERO_LOCATIONS: 1147 case MAT_YES_NEW_NONZERO_LOCATIONS: 1148 case MAT_COLUMNS_UNSORTED: 1149 case MAT_COLUMNS_SORTED: 1150 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1151 case MAT_KEEP_ZEROED_ROWS: 1152 case MAT_NEW_NONZERO_LOCATION_ERR: 1153 case MAT_USE_INODES: 1154 case MAT_DO_NOT_USE_INODES: 1155 case MAT_IGNORE_ZERO_ENTRIES: 1156 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1157 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1158 break; 1159 case MAT_ROW_ORIENTED: 1160 a->roworiented = PETSC_TRUE; 1161 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1162 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1163 break; 1164 case MAT_ROWS_SORTED: 1165 case MAT_ROWS_UNSORTED: 1166 case MAT_YES_NEW_DIAGONALS: 1167 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1168 break; 1169 case MAT_COLUMN_ORIENTED: 1170 a->roworiented = PETSC_FALSE; 1171 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1172 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1173 break; 1174 case MAT_IGNORE_OFF_PROC_ENTRIES: 1175 a->donotstash = PETSC_TRUE; 1176 break; 1177 case MAT_NO_NEW_DIAGONALS: 1178 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1179 case MAT_SYMMETRIC: 1180 case MAT_STRUCTURALLY_SYMMETRIC: 1181 case MAT_HERMITIAN: 1182 case MAT_SYMMETRY_ETERNAL: 1183 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1184 break; 1185 case MAT_NOT_SYMMETRIC: 1186 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1187 case MAT_NOT_HERMITIAN: 1188 case MAT_NOT_SYMMETRY_ETERNAL: 1189 break; 1190 default: 1191 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1192 } 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatGetRow_MPIAIJ" 1198 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1199 { 1200 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1201 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1202 PetscErrorCode ierr; 1203 int i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1204 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1205 int *cmap,*idx_p; 1206 1207 PetscFunctionBegin; 1208 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1209 mat->getrowactive = PETSC_TRUE; 1210 1211 if (!mat->rowvalues && (idx || v)) { 1212 /* 1213 allocate enough space to hold information from the longest row. 1214 */ 1215 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1216 int max = 1,tmp; 1217 for (i=0; i<matin->m; i++) { 1218 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1219 if (max < tmp) { max = tmp; } 1220 } 1221 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1222 mat->rowindices = (int*)(mat->rowvalues + max); 1223 } 1224 1225 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1226 lrow = row - rstart; 1227 1228 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1229 if (!v) {pvA = 0; pvB = 0;} 1230 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1231 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1232 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1233 nztot = nzA + nzB; 1234 1235 cmap = mat->garray; 1236 if (v || idx) { 1237 if (nztot) { 1238 /* Sort by increasing column numbers, assuming A and B already sorted */ 1239 int imark = -1; 1240 if (v) { 1241 *v = v_p = mat->rowvalues; 1242 for (i=0; i<nzB; i++) { 1243 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1244 else break; 1245 } 1246 imark = i; 1247 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1248 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1249 } 1250 if (idx) { 1251 *idx = idx_p = mat->rowindices; 1252 if (imark > -1) { 1253 for (i=0; i<imark; i++) { 1254 idx_p[i] = cmap[cworkB[i]]; 1255 } 1256 } else { 1257 for (i=0; i<nzB; i++) { 1258 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1259 else break; 1260 } 1261 imark = i; 1262 } 1263 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1264 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1265 } 1266 } else { 1267 if (idx) *idx = 0; 1268 if (v) *v = 0; 1269 } 1270 } 1271 *nz = nztot; 1272 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1273 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1279 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1280 { 1281 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1282 1283 PetscFunctionBegin; 1284 if (aij->getrowactive == PETSC_FALSE) { 1285 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1286 } 1287 aij->getrowactive = PETSC_FALSE; 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatNorm_MPIAIJ" 1293 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1294 { 1295 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1296 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1297 PetscErrorCode ierr; 1298 int i,j,cstart = aij->cstart; 1299 PetscReal sum = 0.0; 1300 PetscScalar *v; 1301 1302 PetscFunctionBegin; 1303 if (aij->size == 1) { 1304 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1305 } else { 1306 if (type == NORM_FROBENIUS) { 1307 v = amat->a; 1308 for (i=0; i<amat->nz; i++) { 1309 #if defined(PETSC_USE_COMPLEX) 1310 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1311 #else 1312 sum += (*v)*(*v); v++; 1313 #endif 1314 } 1315 v = bmat->a; 1316 for (i=0; i<bmat->nz; i++) { 1317 #if defined(PETSC_USE_COMPLEX) 1318 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1319 #else 1320 sum += (*v)*(*v); v++; 1321 #endif 1322 } 1323 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1324 *norm = sqrt(*norm); 1325 } else if (type == NORM_1) { /* max column norm */ 1326 PetscReal *tmp,*tmp2; 1327 int *jj,*garray = aij->garray; 1328 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1329 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1330 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1331 *norm = 0.0; 1332 v = amat->a; jj = amat->j; 1333 for (j=0; j<amat->nz; j++) { 1334 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1335 } 1336 v = bmat->a; jj = bmat->j; 1337 for (j=0; j<bmat->nz; j++) { 1338 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1339 } 1340 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1341 for (j=0; j<mat->N; j++) { 1342 if (tmp2[j] > *norm) *norm = tmp2[j]; 1343 } 1344 ierr = PetscFree(tmp);CHKERRQ(ierr); 1345 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1346 } else if (type == NORM_INFINITY) { /* max row norm */ 1347 PetscReal ntemp = 0.0; 1348 for (j=0; j<aij->A->m; j++) { 1349 v = amat->a + amat->i[j]; 1350 sum = 0.0; 1351 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1352 sum += PetscAbsScalar(*v); v++; 1353 } 1354 v = bmat->a + bmat->i[j]; 1355 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1356 sum += PetscAbsScalar(*v); v++; 1357 } 1358 if (sum > ntemp) ntemp = sum; 1359 } 1360 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1361 } else { 1362 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1363 } 1364 } 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatTranspose_MPIAIJ" 1370 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1371 { 1372 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1373 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1374 PetscErrorCode ierr; 1375 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1376 Mat B; 1377 PetscScalar *array; 1378 1379 PetscFunctionBegin; 1380 if (!matout && M != N) { 1381 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1382 } 1383 1384 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1385 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1386 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1387 1388 /* copy over the A part */ 1389 Aloc = (Mat_SeqAIJ*)a->A->data; 1390 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1391 row = a->rstart; 1392 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1393 for (i=0; i<m; i++) { 1394 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1395 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1396 } 1397 aj = Aloc->j; 1398 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1399 1400 /* copy over the B part */ 1401 Aloc = (Mat_SeqAIJ*)a->B->data; 1402 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1403 row = a->rstart; 1404 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1405 ct = cols; 1406 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1407 for (i=0; i<m; i++) { 1408 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1409 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1410 } 1411 ierr = PetscFree(ct);CHKERRQ(ierr); 1412 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1413 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1414 if (matout) { 1415 *matout = B; 1416 } else { 1417 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1418 } 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1424 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1425 { 1426 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1427 Mat a = aij->A,b = aij->B; 1428 PetscErrorCode ierr; 1429 int s1,s2,s3; 1430 1431 PetscFunctionBegin; 1432 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1433 if (rr) { 1434 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1435 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1436 /* Overlap communication with computation. */ 1437 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1438 } 1439 if (ll) { 1440 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1441 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1442 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1443 } 1444 /* scale the diagonal block */ 1445 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1446 1447 if (rr) { 1448 /* Do a scatter end and then right scale the off-diagonal block */ 1449 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1450 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1451 } 1452 1453 PetscFunctionReturn(0); 1454 } 1455 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1459 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1460 { 1461 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1462 PetscErrorCode ierr; 1463 1464 PetscFunctionBegin; 1465 if (!a->rank) { 1466 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1467 } 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1473 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1474 { 1475 PetscFunctionBegin; 1476 *bs = 1; 1477 PetscFunctionReturn(0); 1478 } 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1481 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1482 { 1483 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1484 PetscErrorCode ierr; 1485 1486 PetscFunctionBegin; 1487 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1488 PetscFunctionReturn(0); 1489 } 1490 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "MatEqual_MPIAIJ" 1493 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1494 { 1495 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1496 Mat a,b,c,d; 1497 PetscTruth flg; 1498 PetscErrorCode ierr; 1499 1500 PetscFunctionBegin; 1501 a = matA->A; b = matA->B; 1502 c = matB->A; d = matB->B; 1503 1504 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1505 if (flg == PETSC_TRUE) { 1506 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1507 } 1508 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatCopy_MPIAIJ" 1514 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1515 { 1516 PetscErrorCode ierr; 1517 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1518 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1519 1520 PetscFunctionBegin; 1521 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1522 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1523 /* because of the column compression in the off-processor part of the matrix a->B, 1524 the number of columns in a->B and b->B may be different, hence we cannot call 1525 the MatCopy() directly on the two parts. If need be, we can provide a more 1526 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1527 then copying the submatrices */ 1528 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1529 } else { 1530 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1531 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1532 } 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1538 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1539 { 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1544 PetscFunctionReturn(0); 1545 } 1546 1547 #include "petscblaslapack.h" 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatAXPY_MPIAIJ" 1550 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1551 { 1552 PetscErrorCode ierr; 1553 int one=1,i; 1554 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1555 Mat_SeqAIJ *x,*y; 1556 1557 PetscFunctionBegin; 1558 if (str == SAME_NONZERO_PATTERN) { 1559 x = (Mat_SeqAIJ *)xx->A->data; 1560 y = (Mat_SeqAIJ *)yy->A->data; 1561 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1562 x = (Mat_SeqAIJ *)xx->B->data; 1563 y = (Mat_SeqAIJ *)yy->B->data; 1564 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1565 } else if (str == SUBSET_NONZERO_PATTERN) { 1566 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1567 1568 x = (Mat_SeqAIJ *)xx->B->data; 1569 y = (Mat_SeqAIJ *)yy->B->data; 1570 if (y->xtoy && y->XtoY != xx->B) { 1571 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1572 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1573 } 1574 if (!y->xtoy) { /* get xtoy */ 1575 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1576 y->XtoY = xx->B; 1577 } 1578 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1579 } else { 1580 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 /* -------------------------------------------------------------------*/ 1586 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1587 MatGetRow_MPIAIJ, 1588 MatRestoreRow_MPIAIJ, 1589 MatMult_MPIAIJ, 1590 /* 4*/ MatMultAdd_MPIAIJ, 1591 MatMultTranspose_MPIAIJ, 1592 MatMultTransposeAdd_MPIAIJ, 1593 0, 1594 0, 1595 0, 1596 /*10*/ 0, 1597 0, 1598 0, 1599 MatRelax_MPIAIJ, 1600 MatTranspose_MPIAIJ, 1601 /*15*/ MatGetInfo_MPIAIJ, 1602 MatEqual_MPIAIJ, 1603 MatGetDiagonal_MPIAIJ, 1604 MatDiagonalScale_MPIAIJ, 1605 MatNorm_MPIAIJ, 1606 /*20*/ MatAssemblyBegin_MPIAIJ, 1607 MatAssemblyEnd_MPIAIJ, 1608 0, 1609 MatSetOption_MPIAIJ, 1610 MatZeroEntries_MPIAIJ, 1611 /*25*/ MatZeroRows_MPIAIJ, 1612 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1613 MatLUFactorSymbolic_MPIAIJ_TFS, 1614 #else 1615 0, 1616 #endif 1617 0, 1618 0, 1619 0, 1620 /*30*/ MatSetUpPreallocation_MPIAIJ, 1621 0, 1622 0, 1623 0, 1624 0, 1625 /*35*/ MatDuplicate_MPIAIJ, 1626 0, 1627 0, 1628 0, 1629 0, 1630 /*40*/ MatAXPY_MPIAIJ, 1631 MatGetSubMatrices_MPIAIJ, 1632 MatIncreaseOverlap_MPIAIJ, 1633 MatGetValues_MPIAIJ, 1634 MatCopy_MPIAIJ, 1635 /*45*/ MatPrintHelp_MPIAIJ, 1636 MatScale_MPIAIJ, 1637 0, 1638 0, 1639 0, 1640 /*50*/ MatGetBlockSize_MPIAIJ, 1641 0, 1642 0, 1643 0, 1644 0, 1645 /*55*/ MatFDColoringCreate_MPIAIJ, 1646 0, 1647 MatSetUnfactored_MPIAIJ, 1648 0, 1649 0, 1650 /*60*/ MatGetSubMatrix_MPIAIJ, 1651 MatDestroy_MPIAIJ, 1652 MatView_MPIAIJ, 1653 MatGetPetscMaps_Petsc, 1654 0, 1655 /*65*/ 0, 1656 0, 1657 0, 1658 0, 1659 0, 1660 /*70*/ 0, 1661 0, 1662 MatSetColoring_MPIAIJ, 1663 MatSetValuesAdic_MPIAIJ, 1664 MatSetValuesAdifor_MPIAIJ, 1665 /*75*/ 0, 1666 0, 1667 0, 1668 0, 1669 0, 1670 /*80*/ 0, 1671 0, 1672 0, 1673 0, 1674 /*85*/ MatLoad_MPIAIJ, 1675 0, 1676 0, 1677 0, 1678 0, 1679 /*90*/ 0, 1680 MatMatMult_MPIAIJ_MPIAIJ, 1681 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1682 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1683 }; 1684 1685 /* ----------------------------------------------------------------------------------------*/ 1686 1687 EXTERN_C_BEGIN 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1690 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1691 { 1692 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1693 PetscErrorCode ierr; 1694 1695 PetscFunctionBegin; 1696 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1697 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 EXTERN_C_END 1701 1702 EXTERN_C_BEGIN 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1705 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1706 { 1707 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1708 PetscErrorCode ierr; 1709 1710 PetscFunctionBegin; 1711 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1712 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1713 PetscFunctionReturn(0); 1714 } 1715 EXTERN_C_END 1716 1717 #include "petscpc.h" 1718 EXTERN_C_BEGIN 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1721 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1722 { 1723 Mat_MPIAIJ *b; 1724 PetscErrorCode ierr; 1725 int i; 1726 1727 PetscFunctionBegin; 1728 B->preallocated = PETSC_TRUE; 1729 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1730 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1731 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1732 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1733 if (d_nnz) { 1734 for (i=0; i<B->m; i++) { 1735 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1736 } 1737 } 1738 if (o_nnz) { 1739 for (i=0; i<B->m; i++) { 1740 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1741 } 1742 } 1743 b = (Mat_MPIAIJ*)B->data; 1744 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1745 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1746 1747 PetscFunctionReturn(0); 1748 } 1749 EXTERN_C_END 1750 1751 /*MC 1752 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1753 1754 Options Database Keys: 1755 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1756 1757 Level: beginner 1758 1759 .seealso: MatCreateMPIAIJ 1760 M*/ 1761 1762 EXTERN_C_BEGIN 1763 #undef __FUNCT__ 1764 #define __FUNCT__ "MatCreate_MPIAIJ" 1765 PetscErrorCode MatCreate_MPIAIJ(Mat B) 1766 { 1767 Mat_MPIAIJ *b; 1768 PetscErrorCode ierr; 1769 int i,size; 1770 1771 PetscFunctionBegin; 1772 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1773 1774 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1775 B->data = (void*)b; 1776 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1777 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1778 B->factor = 0; 1779 B->assembled = PETSC_FALSE; 1780 B->mapping = 0; 1781 1782 B->insertmode = NOT_SET_VALUES; 1783 b->size = size; 1784 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1785 1786 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1787 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1788 1789 /* the information in the maps duplicates the information computed below, eventually 1790 we should remove the duplicate information that is not contained in the maps */ 1791 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1792 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1793 1794 /* build local table of row and column ownerships */ 1795 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1796 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1797 b->cowners = b->rowners + b->size + 2; 1798 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1799 b->rowners[0] = 0; 1800 for (i=2; i<=b->size; i++) { 1801 b->rowners[i] += b->rowners[i-1]; 1802 } 1803 b->rstart = b->rowners[b->rank]; 1804 b->rend = b->rowners[b->rank+1]; 1805 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1806 b->cowners[0] = 0; 1807 for (i=2; i<=b->size; i++) { 1808 b->cowners[i] += b->cowners[i-1]; 1809 } 1810 b->cstart = b->cowners[b->rank]; 1811 b->cend = b->cowners[b->rank+1]; 1812 1813 /* build cache for off array entries formed */ 1814 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1815 b->donotstash = PETSC_FALSE; 1816 b->colmap = 0; 1817 b->garray = 0; 1818 b->roworiented = PETSC_TRUE; 1819 1820 /* stuff used for matrix vector multiply */ 1821 b->lvec = PETSC_NULL; 1822 b->Mvctx = PETSC_NULL; 1823 1824 /* stuff for MatGetRow() */ 1825 b->rowindices = 0; 1826 b->rowvalues = 0; 1827 b->getrowactive = PETSC_FALSE; 1828 1829 /* Explicitly create 2 MATSEQAIJ matrices. */ 1830 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1831 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1832 PetscLogObjectParent(B,b->A); 1833 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1834 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1835 PetscLogObjectParent(B,b->B); 1836 1837 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1838 "MatStoreValues_MPIAIJ", 1839 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1840 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1841 "MatRetrieveValues_MPIAIJ", 1842 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1843 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1844 "MatGetDiagonalBlock_MPIAIJ", 1845 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1847 "MatIsTranspose_MPIAIJ", 1848 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1849 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1850 "MatMPIAIJSetPreallocation_MPIAIJ", 1851 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1852 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1853 "MatDiagonalScaleLocal_MPIAIJ", 1854 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1855 PetscFunctionReturn(0); 1856 } 1857 EXTERN_C_END 1858 1859 /*MC 1860 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1861 1862 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1863 and MATMPIAIJ otherwise. As a result, for single process communicators, 1864 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1865 for communicators controlling multiple processes. It is recommended that you call both of 1866 the above preallocation routines for simplicity. 1867 1868 Options Database Keys: 1869 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1870 1871 Level: beginner 1872 1873 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1874 M*/ 1875 1876 EXTERN_C_BEGIN 1877 #undef __FUNCT__ 1878 #define __FUNCT__ "MatCreate_AIJ" 1879 PetscErrorCode MatCreate_AIJ(Mat A) 1880 { 1881 PetscErrorCode ierr; 1882 int size; 1883 1884 PetscFunctionBegin; 1885 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1886 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1887 if (size == 1) { 1888 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1889 } else { 1890 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1891 } 1892 PetscFunctionReturn(0); 1893 } 1894 EXTERN_C_END 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1898 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1899 { 1900 Mat mat; 1901 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1902 PetscErrorCode ierr; 1903 1904 PetscFunctionBegin; 1905 *newmat = 0; 1906 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1907 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1908 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1909 a = (Mat_MPIAIJ*)mat->data; 1910 1911 mat->factor = matin->factor; 1912 mat->assembled = PETSC_TRUE; 1913 mat->insertmode = NOT_SET_VALUES; 1914 mat->preallocated = PETSC_TRUE; 1915 1916 a->rstart = oldmat->rstart; 1917 a->rend = oldmat->rend; 1918 a->cstart = oldmat->cstart; 1919 a->cend = oldmat->cend; 1920 a->size = oldmat->size; 1921 a->rank = oldmat->rank; 1922 a->donotstash = oldmat->donotstash; 1923 a->roworiented = oldmat->roworiented; 1924 a->rowindices = 0; 1925 a->rowvalues = 0; 1926 a->getrowactive = PETSC_FALSE; 1927 1928 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1929 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1930 if (oldmat->colmap) { 1931 #if defined (PETSC_USE_CTABLE) 1932 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1933 #else 1934 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1935 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1936 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1937 #endif 1938 } else a->colmap = 0; 1939 if (oldmat->garray) { 1940 int len; 1941 len = oldmat->B->n; 1942 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1943 PetscLogObjectMemory(mat,len*sizeof(int)); 1944 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1945 } else a->garray = 0; 1946 1947 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1948 PetscLogObjectParent(mat,a->lvec); 1949 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1950 PetscLogObjectParent(mat,a->Mvctx); 1951 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1952 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1953 PetscLogObjectParent(mat,a->A); 1954 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1955 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1956 PetscLogObjectParent(mat,a->B); 1957 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1958 *newmat = mat; 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #include "petscsys.h" 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatLoad_MPIAIJ" 1966 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1967 { 1968 Mat A; 1969 PetscScalar *vals,*svals; 1970 MPI_Comm comm = ((PetscObject)viewer)->comm; 1971 MPI_Status status; 1972 PetscErrorCode ierr; 1973 int i,nz,j,rstart,rend,fd; 1974 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1975 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1976 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1977 1978 PetscFunctionBegin; 1979 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1980 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1981 if (!rank) { 1982 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1983 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1984 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1985 if (header[3] < 0) { 1986 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1987 } 1988 } 1989 1990 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1991 M = header[1]; N = header[2]; 1992 /* determine ownership of all rows */ 1993 m = M/size + ((M % size) > rank); 1994 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1995 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1996 rowners[0] = 0; 1997 for (i=2; i<=size; i++) { 1998 rowners[i] += rowners[i-1]; 1999 } 2000 rstart = rowners[rank]; 2001 rend = rowners[rank+1]; 2002 2003 /* distribute row lengths to all processors */ 2004 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 2005 offlens = ourlens + (rend-rstart); 2006 if (!rank) { 2007 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 2008 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2009 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2010 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2011 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2012 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2013 } else { 2014 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2015 } 2016 2017 if (!rank) { 2018 /* calculate the number of nonzeros on each processor */ 2019 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2020 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2021 for (i=0; i<size; i++) { 2022 for (j=rowners[i]; j< rowners[i+1]; j++) { 2023 procsnz[i] += rowlengths[j]; 2024 } 2025 } 2026 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2027 2028 /* determine max buffer needed and allocate it */ 2029 maxnz = 0; 2030 for (i=0; i<size; i++) { 2031 maxnz = PetscMax(maxnz,procsnz[i]); 2032 } 2033 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2034 2035 /* read in my part of the matrix column indices */ 2036 nz = procsnz[0]; 2037 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2038 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2039 2040 /* read in every one elses and ship off */ 2041 for (i=1; i<size; i++) { 2042 nz = procsnz[i]; 2043 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2044 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2045 } 2046 ierr = PetscFree(cols);CHKERRQ(ierr); 2047 } else { 2048 /* determine buffer space needed for message */ 2049 nz = 0; 2050 for (i=0; i<m; i++) { 2051 nz += ourlens[i]; 2052 } 2053 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2054 2055 /* receive message of column indices*/ 2056 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2057 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2058 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2059 } 2060 2061 /* determine column ownership if matrix is not square */ 2062 if (N != M) { 2063 n = N/size + ((N % size) > rank); 2064 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2065 cstart = cend - n; 2066 } else { 2067 cstart = rstart; 2068 cend = rend; 2069 n = cend - cstart; 2070 } 2071 2072 /* loop over local rows, determining number of off diagonal entries */ 2073 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2074 jj = 0; 2075 for (i=0; i<m; i++) { 2076 for (j=0; j<ourlens[i]; j++) { 2077 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2078 jj++; 2079 } 2080 } 2081 2082 /* create our matrix */ 2083 for (i=0; i<m; i++) { 2084 ourlens[i] -= offlens[i]; 2085 } 2086 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2087 ierr = MatSetType(A,type);CHKERRQ(ierr); 2088 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2089 2090 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2091 for (i=0; i<m; i++) { 2092 ourlens[i] += offlens[i]; 2093 } 2094 2095 if (!rank) { 2096 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2097 2098 /* read in my part of the matrix numerical values */ 2099 nz = procsnz[0]; 2100 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2101 2102 /* insert into matrix */ 2103 jj = rstart; 2104 smycols = mycols; 2105 svals = vals; 2106 for (i=0; i<m; i++) { 2107 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2108 smycols += ourlens[i]; 2109 svals += ourlens[i]; 2110 jj++; 2111 } 2112 2113 /* read in other processors and ship out */ 2114 for (i=1; i<size; i++) { 2115 nz = procsnz[i]; 2116 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2117 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2118 } 2119 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2120 } else { 2121 /* receive numeric values */ 2122 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2123 2124 /* receive message of values*/ 2125 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2126 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2127 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2128 2129 /* insert into matrix */ 2130 jj = rstart; 2131 smycols = mycols; 2132 svals = vals; 2133 for (i=0; i<m; i++) { 2134 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2135 smycols += ourlens[i]; 2136 svals += ourlens[i]; 2137 jj++; 2138 } 2139 } 2140 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2141 ierr = PetscFree(vals);CHKERRQ(ierr); 2142 ierr = PetscFree(mycols);CHKERRQ(ierr); 2143 ierr = PetscFree(rowners);CHKERRQ(ierr); 2144 2145 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2146 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2147 *newmat = A; 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2153 /* 2154 Not great since it makes two copies of the submatrix, first an SeqAIJ 2155 in local and then by concatenating the local matrices the end result. 2156 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2157 */ 2158 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2159 { 2160 PetscErrorCode ierr; 2161 int i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2162 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2163 Mat *local,M,Mreuse; 2164 PetscScalar *vwork,*aa; 2165 MPI_Comm comm = mat->comm; 2166 Mat_SeqAIJ *aij; 2167 2168 2169 PetscFunctionBegin; 2170 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2171 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2172 2173 if (call == MAT_REUSE_MATRIX) { 2174 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2175 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2176 local = &Mreuse; 2177 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2178 } else { 2179 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2180 Mreuse = *local; 2181 ierr = PetscFree(local);CHKERRQ(ierr); 2182 } 2183 2184 /* 2185 m - number of local rows 2186 n - number of columns (same on all processors) 2187 rstart - first row in new global matrix generated 2188 */ 2189 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2190 if (call == MAT_INITIAL_MATRIX) { 2191 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2192 ii = aij->i; 2193 jj = aij->j; 2194 2195 /* 2196 Determine the number of non-zeros in the diagonal and off-diagonal 2197 portions of the matrix in order to do correct preallocation 2198 */ 2199 2200 /* first get start and end of "diagonal" columns */ 2201 if (csize == PETSC_DECIDE) { 2202 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2203 if (mglobal == n) { /* square matrix */ 2204 nlocal = m; 2205 } else { 2206 nlocal = n/size + ((n % size) > rank); 2207 } 2208 } else { 2209 nlocal = csize; 2210 } 2211 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2212 rstart = rend - nlocal; 2213 if (rank == size - 1 && rend != n) { 2214 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2215 } 2216 2217 /* next, compute all the lengths */ 2218 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2219 olens = dlens + m; 2220 for (i=0; i<m; i++) { 2221 jend = ii[i+1] - ii[i]; 2222 olen = 0; 2223 dlen = 0; 2224 for (j=0; j<jend; j++) { 2225 if (*jj < rstart || *jj >= rend) olen++; 2226 else dlen++; 2227 jj++; 2228 } 2229 olens[i] = olen; 2230 dlens[i] = dlen; 2231 } 2232 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2233 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2234 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2235 ierr = PetscFree(dlens);CHKERRQ(ierr); 2236 } else { 2237 int ml,nl; 2238 2239 M = *newmat; 2240 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2241 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2242 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2243 /* 2244 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2245 rather than the slower MatSetValues(). 2246 */ 2247 M->was_assembled = PETSC_TRUE; 2248 M->assembled = PETSC_FALSE; 2249 } 2250 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2251 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2252 ii = aij->i; 2253 jj = aij->j; 2254 aa = aij->a; 2255 for (i=0; i<m; i++) { 2256 row = rstart + i; 2257 nz = ii[i+1] - ii[i]; 2258 cwork = jj; jj += nz; 2259 vwork = aa; aa += nz; 2260 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2261 } 2262 2263 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2264 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2265 *newmat = M; 2266 2267 /* save submatrix used in processor for next request */ 2268 if (call == MAT_INITIAL_MATRIX) { 2269 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2270 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2271 } 2272 2273 PetscFunctionReturn(0); 2274 } 2275 2276 #undef __FUNCT__ 2277 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2278 /*@C 2279 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2280 (the default parallel PETSc format). For good matrix assembly performance 2281 the user should preallocate the matrix storage by setting the parameters 2282 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2283 performance can be increased by more than a factor of 50. 2284 2285 Collective on MPI_Comm 2286 2287 Input Parameters: 2288 + A - the matrix 2289 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2290 (same value is used for all local rows) 2291 . d_nnz - array containing the number of nonzeros in the various rows of the 2292 DIAGONAL portion of the local submatrix (possibly different for each row) 2293 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2294 The size of this array is equal to the number of local rows, i.e 'm'. 2295 You must leave room for the diagonal entry even if it is zero. 2296 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2297 submatrix (same value is used for all local rows). 2298 - o_nnz - array containing the number of nonzeros in the various rows of the 2299 OFF-DIAGONAL portion of the local submatrix (possibly different for 2300 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2301 structure. The size of this array is equal to the number 2302 of local rows, i.e 'm'. 2303 2304 The AIJ format (also called the Yale sparse matrix format or 2305 compressed row storage), is fully compatible with standard Fortran 77 2306 storage. That is, the stored row and column indices can begin at 2307 either one (as in Fortran) or zero. See the users manual for details. 2308 2309 The user MUST specify either the local or global matrix dimensions 2310 (possibly both). 2311 2312 The parallel matrix is partitioned such that the first m0 rows belong to 2313 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2314 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2315 2316 The DIAGONAL portion of the local submatrix of a processor can be defined 2317 as the submatrix which is obtained by extraction the part corresponding 2318 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2319 first row that belongs to the processor, and r2 is the last row belonging 2320 to the this processor. This is a square mxm matrix. The remaining portion 2321 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2322 2323 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2324 2325 By default, this format uses inodes (identical nodes) when possible. 2326 We search for consecutive rows with the same nonzero structure, thereby 2327 reusing matrix information to achieve increased efficiency. 2328 2329 Options Database Keys: 2330 + -mat_aij_no_inode - Do not use inodes 2331 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2332 - -mat_aij_oneindex - Internally use indexing starting at 1 2333 rather than 0. Note that when calling MatSetValues(), 2334 the user still MUST index entries starting at 0! 2335 2336 Example usage: 2337 2338 Consider the following 8x8 matrix with 34 non-zero values, that is 2339 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2340 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2341 as follows: 2342 2343 .vb 2344 1 2 0 | 0 3 0 | 0 4 2345 Proc0 0 5 6 | 7 0 0 | 8 0 2346 9 0 10 | 11 0 0 | 12 0 2347 ------------------------------------- 2348 13 0 14 | 15 16 17 | 0 0 2349 Proc1 0 18 0 | 19 20 21 | 0 0 2350 0 0 0 | 22 23 0 | 24 0 2351 ------------------------------------- 2352 Proc2 25 26 27 | 0 0 28 | 29 0 2353 30 0 0 | 31 32 33 | 0 34 2354 .ve 2355 2356 This can be represented as a collection of submatrices as: 2357 2358 .vb 2359 A B C 2360 D E F 2361 G H I 2362 .ve 2363 2364 Where the submatrices A,B,C are owned by proc0, D,E,F are 2365 owned by proc1, G,H,I are owned by proc2. 2366 2367 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2368 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2369 The 'M','N' parameters are 8,8, and have the same values on all procs. 2370 2371 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2372 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2373 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2374 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2375 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2376 matrix, ans [DF] as another SeqAIJ matrix. 2377 2378 When d_nz, o_nz parameters are specified, d_nz storage elements are 2379 allocated for every row of the local diagonal submatrix, and o_nz 2380 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2381 One way to choose d_nz and o_nz is to use the max nonzerors per local 2382 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2383 In this case, the values of d_nz,o_nz are: 2384 .vb 2385 proc0 : dnz = 2, o_nz = 2 2386 proc1 : dnz = 3, o_nz = 2 2387 proc2 : dnz = 1, o_nz = 4 2388 .ve 2389 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2390 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2391 for proc3. i.e we are using 12+15+10=37 storage locations to store 2392 34 values. 2393 2394 When d_nnz, o_nnz parameters are specified, the storage is specified 2395 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2396 In the above case the values for d_nnz,o_nnz are: 2397 .vb 2398 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2399 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2400 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2401 .ve 2402 Here the space allocated is sum of all the above values i.e 34, and 2403 hence pre-allocation is perfect. 2404 2405 Level: intermediate 2406 2407 .keywords: matrix, aij, compressed row, sparse, parallel 2408 2409 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2410 @*/ 2411 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2412 { 2413 PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]); 2414 2415 PetscFunctionBegin; 2416 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2417 if (f) { 2418 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2419 } 2420 PetscFunctionReturn(0); 2421 } 2422 2423 #undef __FUNCT__ 2424 #define __FUNCT__ "MatCreateMPIAIJ" 2425 /*@C 2426 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2427 (the default parallel PETSc format). For good matrix assembly performance 2428 the user should preallocate the matrix storage by setting the parameters 2429 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2430 performance can be increased by more than a factor of 50. 2431 2432 Collective on MPI_Comm 2433 2434 Input Parameters: 2435 + comm - MPI communicator 2436 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2437 This value should be the same as the local size used in creating the 2438 y vector for the matrix-vector product y = Ax. 2439 . n - This value should be the same as the local size used in creating the 2440 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2441 calculated if N is given) For square matrices n is almost always m. 2442 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2443 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2444 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2445 (same value is used for all local rows) 2446 . d_nnz - array containing the number of nonzeros in the various rows of the 2447 DIAGONAL portion of the local submatrix (possibly different for each row) 2448 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2449 The size of this array is equal to the number of local rows, i.e 'm'. 2450 You must leave room for the diagonal entry even if it is zero. 2451 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2452 submatrix (same value is used for all local rows). 2453 - o_nnz - array containing the number of nonzeros in the various rows of the 2454 OFF-DIAGONAL portion of the local submatrix (possibly different for 2455 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2456 structure. The size of this array is equal to the number 2457 of local rows, i.e 'm'. 2458 2459 Output Parameter: 2460 . A - the matrix 2461 2462 Notes: 2463 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2464 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2465 storage requirements for this matrix. 2466 2467 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2468 processor than it must be used on all processors that share the object for 2469 that argument. 2470 2471 The AIJ format (also called the Yale sparse matrix format or 2472 compressed row storage), is fully compatible with standard Fortran 77 2473 storage. That is, the stored row and column indices can begin at 2474 either one (as in Fortran) or zero. See the users manual for details. 2475 2476 The user MUST specify either the local or global matrix dimensions 2477 (possibly both). 2478 2479 The parallel matrix is partitioned such that the first m0 rows belong to 2480 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2481 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2482 2483 The DIAGONAL portion of the local submatrix of a processor can be defined 2484 as the submatrix which is obtained by extraction the part corresponding 2485 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2486 first row that belongs to the processor, and r2 is the last row belonging 2487 to the this processor. This is a square mxm matrix. The remaining portion 2488 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2489 2490 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2491 2492 When calling this routine with a single process communicator, a matrix of 2493 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2494 type of communicator, use the construction mechanism: 2495 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2496 2497 By default, this format uses inodes (identical nodes) when possible. 2498 We search for consecutive rows with the same nonzero structure, thereby 2499 reusing matrix information to achieve increased efficiency. 2500 2501 Options Database Keys: 2502 + -mat_aij_no_inode - Do not use inodes 2503 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2504 - -mat_aij_oneindex - Internally use indexing starting at 1 2505 rather than 0. Note that when calling MatSetValues(), 2506 the user still MUST index entries starting at 0! 2507 2508 2509 Example usage: 2510 2511 Consider the following 8x8 matrix with 34 non-zero values, that is 2512 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2513 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2514 as follows: 2515 2516 .vb 2517 1 2 0 | 0 3 0 | 0 4 2518 Proc0 0 5 6 | 7 0 0 | 8 0 2519 9 0 10 | 11 0 0 | 12 0 2520 ------------------------------------- 2521 13 0 14 | 15 16 17 | 0 0 2522 Proc1 0 18 0 | 19 20 21 | 0 0 2523 0 0 0 | 22 23 0 | 24 0 2524 ------------------------------------- 2525 Proc2 25 26 27 | 0 0 28 | 29 0 2526 30 0 0 | 31 32 33 | 0 34 2527 .ve 2528 2529 This can be represented as a collection of submatrices as: 2530 2531 .vb 2532 A B C 2533 D E F 2534 G H I 2535 .ve 2536 2537 Where the submatrices A,B,C are owned by proc0, D,E,F are 2538 owned by proc1, G,H,I are owned by proc2. 2539 2540 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2541 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2542 The 'M','N' parameters are 8,8, and have the same values on all procs. 2543 2544 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2545 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2546 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2547 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2548 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2549 matrix, ans [DF] as another SeqAIJ matrix. 2550 2551 When d_nz, o_nz parameters are specified, d_nz storage elements are 2552 allocated for every row of the local diagonal submatrix, and o_nz 2553 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2554 One way to choose d_nz and o_nz is to use the max nonzerors per local 2555 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2556 In this case, the values of d_nz,o_nz are: 2557 .vb 2558 proc0 : dnz = 2, o_nz = 2 2559 proc1 : dnz = 3, o_nz = 2 2560 proc2 : dnz = 1, o_nz = 4 2561 .ve 2562 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2563 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2564 for proc3. i.e we are using 12+15+10=37 storage locations to store 2565 34 values. 2566 2567 When d_nnz, o_nnz parameters are specified, the storage is specified 2568 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2569 In the above case the values for d_nnz,o_nnz are: 2570 .vb 2571 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2572 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2573 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2574 .ve 2575 Here the space allocated is sum of all the above values i.e 34, and 2576 hence pre-allocation is perfect. 2577 2578 Level: intermediate 2579 2580 .keywords: matrix, aij, compressed row, sparse, parallel 2581 2582 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2583 @*/ 2584 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2585 { 2586 PetscErrorCode ierr; 2587 int size; 2588 2589 PetscFunctionBegin; 2590 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2591 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2592 if (size > 1) { 2593 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2594 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2595 } else { 2596 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2597 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2598 } 2599 PetscFunctionReturn(0); 2600 } 2601 2602 #undef __FUNCT__ 2603 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2604 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2605 { 2606 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2607 PetscFunctionBegin; 2608 *Ad = a->A; 2609 *Ao = a->B; 2610 *colmap = a->garray; 2611 PetscFunctionReturn(0); 2612 } 2613 2614 #undef __FUNCT__ 2615 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2616 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2617 { 2618 PetscErrorCode ierr; 2619 int i; 2620 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2621 2622 PetscFunctionBegin; 2623 if (coloring->ctype == IS_COLORING_LOCAL) { 2624 ISColoringValue *allcolors,*colors; 2625 ISColoring ocoloring; 2626 2627 /* set coloring for diagonal portion */ 2628 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2629 2630 /* set coloring for off-diagonal portion */ 2631 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2632 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2633 for (i=0; i<a->B->n; i++) { 2634 colors[i] = allcolors[a->garray[i]]; 2635 } 2636 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2637 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2638 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2639 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2640 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2641 ISColoringValue *colors; 2642 int *larray; 2643 ISColoring ocoloring; 2644 2645 /* set coloring for diagonal portion */ 2646 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2647 for (i=0; i<a->A->n; i++) { 2648 larray[i] = i + a->cstart; 2649 } 2650 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2651 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2652 for (i=0; i<a->A->n; i++) { 2653 colors[i] = coloring->colors[larray[i]]; 2654 } 2655 ierr = PetscFree(larray);CHKERRQ(ierr); 2656 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2657 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2658 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2659 2660 /* set coloring for off-diagonal portion */ 2661 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2662 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2663 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2664 for (i=0; i<a->B->n; i++) { 2665 colors[i] = coloring->colors[larray[i]]; 2666 } 2667 ierr = PetscFree(larray);CHKERRQ(ierr); 2668 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2669 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2670 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2671 } else { 2672 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2673 } 2674 2675 PetscFunctionReturn(0); 2676 } 2677 2678 #undef __FUNCT__ 2679 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2680 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2681 { 2682 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2683 PetscErrorCode ierr; 2684 2685 PetscFunctionBegin; 2686 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2687 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2688 PetscFunctionReturn(0); 2689 } 2690 2691 #undef __FUNCT__ 2692 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2693 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2694 { 2695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2696 PetscErrorCode ierr; 2697 2698 PetscFunctionBegin; 2699 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2700 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2701 PetscFunctionReturn(0); 2702 } 2703 2704 #undef __FUNCT__ 2705 #define __FUNCT__ "MatMerge" 2706 /*@C 2707 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2708 matrices from each processor 2709 2710 Collective on MPI_Comm 2711 2712 Input Parameters: 2713 + comm - the communicators the parallel matrix will live on 2714 . inmat - the input sequential matrices 2715 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2716 2717 Output Parameter: 2718 . outmat - the parallel matrix generated 2719 2720 Level: advanced 2721 2722 Notes: The number of columns of the matrix in EACH of the seperate files 2723 MUST be the same. 2724 2725 @*/ 2726 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,MatReuse scall,Mat *outmat) 2727 { 2728 PetscErrorCode ierr; 2729 int m,n,i,rstart,nnz,I,*dnz,*onz; 2730 const int *indx; 2731 const PetscScalar *values; 2732 PetscMap columnmap,rowmap; 2733 2734 PetscFunctionBegin; 2735 2736 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2737 if (scall == MAT_INITIAL_MATRIX){ 2738 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2739 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2740 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2741 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2742 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2743 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2744 2745 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2746 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2747 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2748 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2749 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2750 2751 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2752 for (i=0;i<m;i++) { 2753 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2754 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2755 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2756 } 2757 /* This routine will ONLY return MPIAIJ type matrix */ 2758 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2759 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2760 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2761 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2762 2763 } else if (scall == MAT_REUSE_MATRIX){ 2764 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2765 } else { 2766 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2767 } 2768 2769 for (i=0;i<m;i++) { 2770 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2771 I = i + rstart; 2772 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2773 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2774 } 2775 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2776 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2777 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2778 #ifdef OLD 2779 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2780 2781 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2782 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2783 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2784 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2785 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2786 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2787 2788 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2789 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2790 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2791 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2792 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2793 2794 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2795 for (i=0;i<m;i++) { 2796 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2797 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2798 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2799 } 2800 /* This routine will ONLY return MPIAIJ type matrix */ 2801 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2802 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2803 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2804 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2805 2806 for (i=0;i<m;i++) { 2807 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2808 I = i + rstart; 2809 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2810 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2811 } 2812 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2813 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2814 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2815 #endif 2816 PetscFunctionReturn(0); 2817 } 2818 2819 #undef __FUNCT__ 2820 #define __FUNCT__ "MatFileSplit" 2821 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2822 { 2823 PetscErrorCode ierr; 2824 int rank,m,N,i,rstart,nnz; 2825 size_t len; 2826 const int *indx; 2827 PetscViewer out; 2828 char *name; 2829 Mat B; 2830 const PetscScalar *values; 2831 2832 PetscFunctionBegin; 2833 2834 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2835 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2836 /* Should this be the type of the diagonal block of A? */ 2837 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2838 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2839 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2840 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2841 for (i=0;i<m;i++) { 2842 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2843 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2844 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2845 } 2846 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2847 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2848 2849 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2850 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2851 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2852 sprintf(name,"%s.%d",outfile,rank); 2853 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2854 ierr = PetscFree(name); 2855 ierr = MatView(B,out);CHKERRQ(ierr); 2856 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2857 ierr = MatDestroy(B);CHKERRQ(ierr); 2858 PetscFunctionReturn(0); 2859 } 2860