1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (PetscInt*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(PetscInt));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (PetscInt*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(PetscInt));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 192 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 PetscInt nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 278 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 PetscInt nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 PetscMPIInt n; 351 PetscInt i,j,rstart,ncols,flg; 352 PetscInt *row,*col,other_disassembled; 353 PetscScalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 360 if (!flg) break; 361 362 for (i=0; i<n;) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 373 } 374 375 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 388 /* reaccess the b because aij->B was changed */ 389 b = (Mat_SeqAIJ *)aij->B->data; 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 404 /* used by MatAXPY() */ 405 a->xtoy = 0; b->xtoy = 0; 406 a->XtoY = 0; b->XtoY = 0; 407 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 413 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 414 { 415 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 416 PetscErrorCode ierr; 417 418 PetscFunctionBegin; 419 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 420 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatZeroRows_MPIAIJ" 426 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 427 { 428 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 429 PetscErrorCode ierr; 430 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 431 PetscInt i,N,*rows,*owners = l->rowners; 432 PetscInt *nprocs,j,idx,nsends,row; 433 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 434 PetscInt *rvalues,count,base,slen,*source; 435 PetscInt *lens,*lrows,*values,rstart=l->rstart; 436 MPI_Comm comm = A->comm; 437 MPI_Request *send_waits,*recv_waits; 438 MPI_Status recv_status,*send_status; 439 IS istmp; 440 PetscTruth found; 441 442 PetscFunctionBegin; 443 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 445 446 /* first count number of contributors to each processor */ 447 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 448 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 450 for (i=0; i<N; i++) { 451 idx = rows[i]; 452 found = PETSC_FALSE; 453 for (j=0; j<size; j++) { 454 if (idx >= owners[j] && idx < owners[j+1]) { 455 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 456 } 457 } 458 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 459 } 460 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 461 462 /* inform other processors of number of messages and max length*/ 463 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 464 465 /* post receives: */ 466 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 467 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 468 for (i=0; i<nrecvs; i++) { 469 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 470 } 471 472 /* do sends: 473 1) starts[i] gives the starting index in svalues for stuff going to 474 the ith processor 475 */ 476 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 477 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 478 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 479 starts[0] = 0; 480 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 481 for (i=0; i<N; i++) { 482 svalues[starts[owner[i]]++] = rows[i]; 483 } 484 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 485 486 starts[0] = 0; 487 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 488 count = 0; 489 for (i=0; i<size; i++) { 490 if (nprocs[2*i+1]) { 491 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 492 } 493 } 494 ierr = PetscFree(starts);CHKERRQ(ierr); 495 496 base = owners[rank]; 497 498 /* wait on receives */ 499 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 500 source = lens + nrecvs; 501 count = nrecvs; slen = 0; 502 while (count) { 503 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 504 /* unpack receives into our local space */ 505 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 506 source[imdex] = recv_status.MPI_SOURCE; 507 lens[imdex] = n; 508 slen += n; 509 count--; 510 } 511 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 512 513 /* move the data into the send scatter */ 514 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 515 count = 0; 516 for (i=0; i<nrecvs; i++) { 517 values = rvalues + i*nmax; 518 for (j=0; j<lens[i]; j++) { 519 lrows[count++] = values[j] - base; 520 } 521 } 522 ierr = PetscFree(rvalues);CHKERRQ(ierr); 523 ierr = PetscFree(lens);CHKERRQ(ierr); 524 ierr = PetscFree(owner);CHKERRQ(ierr); 525 ierr = PetscFree(nprocs);CHKERRQ(ierr); 526 527 /* actually zap the local rows */ 528 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 529 PetscLogObjectParent(A,istmp); 530 531 /* 532 Zero the required rows. If the "diagonal block" of the matrix 533 is square and the user wishes to set the diagonal we use seperate 534 code so that MatSetValues() is not called for each diagonal allocating 535 new memory, thus calling lots of mallocs and slowing things down. 536 537 Contributed by: Mathew Knepley 538 */ 539 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 540 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 541 if (diag && (l->A->M == l->A->N)) { 542 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 543 } else if (diag) { 544 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 545 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 546 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 547 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 548 } 549 for (i = 0; i < slen; i++) { 550 row = lrows[i] + rstart; 551 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 552 } 553 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 } else { 556 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 557 } 558 ierr = ISDestroy(istmp);CHKERRQ(ierr); 559 ierr = PetscFree(lrows);CHKERRQ(ierr); 560 561 /* wait on sends */ 562 if (nsends) { 563 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 564 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 565 ierr = PetscFree(send_status);CHKERRQ(ierr); 566 } 567 ierr = PetscFree(send_waits);CHKERRQ(ierr); 568 ierr = PetscFree(svalues);CHKERRQ(ierr); 569 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatMult_MPIAIJ" 575 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 576 { 577 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 578 PetscErrorCode ierr; 579 PetscInt nt; 580 581 PetscFunctionBegin; 582 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 583 if (nt != A->n) { 584 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 585 } 586 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 588 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 589 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatMultAdd_MPIAIJ" 595 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 596 { 597 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 598 PetscErrorCode ierr; 599 600 PetscFunctionBegin; 601 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 603 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 610 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 611 { 612 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 613 PetscErrorCode ierr; 614 615 PetscFunctionBegin; 616 /* do nondiagonal part */ 617 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 618 /* send it on its way */ 619 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 620 /* do local part */ 621 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 622 /* receive remote parts: note this assumes the values are not actually */ 623 /* inserted in yy until the next line, which is true for my implementation*/ 624 /* but is not perhaps always true. */ 625 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 EXTERN_C_BEGIN 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 632 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 633 { 634 MPI_Comm comm; 635 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 636 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 637 IS Me,Notme; 638 PetscErrorCode ierr; 639 PetscInt M,N,first,last,*notme,i; 640 PetscMPIInt size; 641 642 PetscFunctionBegin; 643 644 /* Easy test: symmetric diagonal block */ 645 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 646 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 647 if (!*f) PetscFunctionReturn(0); 648 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 649 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 650 if (size == 1) PetscFunctionReturn(0); 651 652 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 653 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 654 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 655 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 656 for (i=0; i<first; i++) notme[i] = i; 657 for (i=last; i<M; i++) notme[i-last+first] = i; 658 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 659 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 660 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 661 Aoff = Aoffs[0]; 662 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 663 Boff = Boffs[0]; 664 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 665 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 666 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 667 ierr = ISDestroy(Me);CHKERRQ(ierr); 668 ierr = ISDestroy(Notme);CHKERRQ(ierr); 669 670 PetscFunctionReturn(0); 671 } 672 EXTERN_C_END 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 676 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 679 PetscErrorCode ierr; 680 681 PetscFunctionBegin; 682 /* do nondiagonal part */ 683 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 684 /* send it on its way */ 685 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 686 /* do local part */ 687 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 688 /* receive remote parts: note this assumes the values are not actually */ 689 /* inserted in yy until the next line, which is true for my implementation*/ 690 /* but is not perhaps always true. */ 691 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 /* 696 This only works correctly for square matrices where the subblock A->A is the 697 diagonal block 698 */ 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 701 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 702 { 703 PetscErrorCode ierr; 704 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 705 706 PetscFunctionBegin; 707 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 708 if (a->rstart != a->cstart || a->rend != a->cend) { 709 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 710 } 711 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatScale_MPIAIJ" 717 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 PetscErrorCode ierr; 721 722 PetscFunctionBegin; 723 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 724 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatDestroy_MPIAIJ" 730 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 731 { 732 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 733 PetscErrorCode ierr; 734 735 PetscFunctionBegin; 736 #if defined(PETSC_USE_LOG) 737 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 738 #endif 739 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 740 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 741 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 742 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 743 #if defined (PETSC_USE_CTABLE) 744 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 745 #else 746 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 747 #endif 748 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 749 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 750 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 751 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 752 ierr = PetscFree(aij);CHKERRQ(ierr); 753 754 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 755 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 756 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 757 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatView_MPIAIJ_Binary" 766 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 767 { 768 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 769 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 770 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 771 PetscErrorCode ierr; 772 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 773 int fd; 774 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 775 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 776 PetscScalar *column_values; 777 778 PetscFunctionBegin; 779 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 780 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 781 nz = A->nz + B->nz; 782 if (!rank) { 783 header[0] = MAT_FILE_COOKIE; 784 header[1] = mat->M; 785 header[2] = mat->N; 786 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 787 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 788 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 789 /* get largest number of rows any processor has */ 790 rlen = mat->m; 791 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 792 for (i=1; i<size; i++) { 793 rlen = PetscMax(rlen,range[i+1] - range[i]); 794 } 795 } else { 796 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 797 rlen = mat->m; 798 } 799 800 /* load up the local row counts */ 801 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 802 for (i=0; i<mat->m; i++) { 803 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 804 } 805 806 /* store the row lengths to the file */ 807 if (!rank) { 808 MPI_Status status; 809 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 810 for (i=1; i<size; i++) { 811 rlen = range[i+1] - range[i]; 812 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 813 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 814 } 815 } else { 816 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 817 } 818 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 819 820 /* load up the local column indices */ 821 nzmax = nz; /* )th processor needs space a largest processor needs */ 822 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 823 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 824 cnt = 0; 825 for (i=0; i<mat->m; i++) { 826 for (j=B->i[i]; j<B->i[i+1]; j++) { 827 if ( (col = garray[B->j[j]]) > cstart) break; 828 column_indices[cnt++] = col; 829 } 830 for (k=A->i[i]; k<A->i[i+1]; k++) { 831 column_indices[cnt++] = A->j[k] + cstart; 832 } 833 for (; j<B->i[i+1]; j++) { 834 column_indices[cnt++] = garray[B->j[j]]; 835 } 836 } 837 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 838 839 /* store the column indices to the file */ 840 if (!rank) { 841 MPI_Status status; 842 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 843 for (i=1; i<size; i++) { 844 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 845 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 846 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 847 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 848 } 849 } else { 850 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 851 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 852 } 853 ierr = PetscFree(column_indices);CHKERRQ(ierr); 854 855 /* load up the local column values */ 856 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 857 cnt = 0; 858 for (i=0; i<mat->m; i++) { 859 for (j=B->i[i]; j<B->i[i+1]; j++) { 860 if ( garray[B->j[j]] > cstart) break; 861 column_values[cnt++] = B->a[j]; 862 } 863 for (k=A->i[i]; k<A->i[i+1]; k++) { 864 column_values[cnt++] = A->a[k]; 865 } 866 for (; j<B->i[i+1]; j++) { 867 column_values[cnt++] = B->a[j]; 868 } 869 } 870 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 871 872 /* store the column values to the file */ 873 if (!rank) { 874 MPI_Status status; 875 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 876 for (i=1; i<size; i++) { 877 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 878 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 879 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 880 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 881 } 882 } else { 883 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 884 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 885 } 886 ierr = PetscFree(column_values);CHKERRQ(ierr); 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 892 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 893 { 894 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 895 PetscErrorCode ierr; 896 PetscMPIInt rank = aij->rank,size = aij->size; 897 PetscTruth isdraw,iascii,flg,isbinary; 898 PetscViewer sviewer; 899 PetscViewerFormat format; 900 901 PetscFunctionBegin; 902 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 903 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 904 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 905 if (iascii) { 906 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 907 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 908 MatInfo info; 909 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 910 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 911 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 912 if (flg) { 913 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 914 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 915 } else { 916 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 917 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 918 } 919 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 920 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 921 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 922 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 923 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 924 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } else if (format == PETSC_VIEWER_ASCII_INFO) { 927 PetscFunctionReturn(0); 928 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 929 PetscFunctionReturn(0); 930 } 931 } else if (isbinary) { 932 if (size == 1) { 933 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 934 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 935 } else { 936 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 937 } 938 PetscFunctionReturn(0); 939 } else if (isdraw) { 940 PetscDraw draw; 941 PetscTruth isnull; 942 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 943 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 944 } 945 946 if (size == 1) { 947 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 948 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 949 } else { 950 /* assemble the entire matrix onto first processor. */ 951 Mat A; 952 Mat_SeqAIJ *Aloc; 953 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 954 PetscScalar *a; 955 956 if (!rank) { 957 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 958 } else { 959 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 960 } 961 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 962 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 963 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 964 PetscLogObjectParent(mat,A); 965 966 /* copy over the A part */ 967 Aloc = (Mat_SeqAIJ*)aij->A->data; 968 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 969 row = aij->rstart; 970 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 971 for (i=0; i<m; i++) { 972 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 973 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 974 } 975 aj = Aloc->j; 976 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 977 978 /* copy over the B part */ 979 Aloc = (Mat_SeqAIJ*)aij->B->data; 980 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 981 row = aij->rstart; 982 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 983 ct = cols; 984 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 985 for (i=0; i<m; i++) { 986 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 987 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 988 } 989 ierr = PetscFree(ct);CHKERRQ(ierr); 990 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 991 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 992 /* 993 Everyone has to call to draw the matrix since the graphics waits are 994 synchronized across all processors that share the PetscDraw object 995 */ 996 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 997 if (!rank) { 998 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 999 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1000 } 1001 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1002 ierr = MatDestroy(A);CHKERRQ(ierr); 1003 } 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatView_MPIAIJ" 1009 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1010 { 1011 PetscErrorCode ierr; 1012 PetscTruth iascii,isdraw,issocket,isbinary; 1013 1014 PetscFunctionBegin; 1015 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1016 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1017 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1018 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1019 if (iascii || isdraw || isbinary || issocket) { 1020 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1021 } else { 1022 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatRelax_MPIAIJ" 1031 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1032 { 1033 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1034 PetscErrorCode ierr; 1035 Vec bb1; 1036 PetscScalar mone=-1.0; 1037 1038 PetscFunctionBegin; 1039 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1040 1041 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1042 1043 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1044 if (flag & SOR_ZERO_INITIAL_GUESS) { 1045 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1046 its--; 1047 } 1048 1049 while (its--) { 1050 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1051 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1052 1053 /* update rhs: bb1 = bb - B*x */ 1054 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1055 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1056 1057 /* local sweep */ 1058 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1059 CHKERRQ(ierr); 1060 } 1061 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1062 if (flag & SOR_ZERO_INITIAL_GUESS) { 1063 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1064 its--; 1065 } 1066 while (its--) { 1067 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1068 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 1070 /* update rhs: bb1 = bb - B*x */ 1071 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1072 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1073 1074 /* local sweep */ 1075 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1076 CHKERRQ(ierr); 1077 } 1078 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1079 if (flag & SOR_ZERO_INITIAL_GUESS) { 1080 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1081 its--; 1082 } 1083 while (its--) { 1084 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1085 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1086 1087 /* update rhs: bb1 = bb - B*x */ 1088 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1089 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1090 1091 /* local sweep */ 1092 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1093 CHKERRQ(ierr); 1094 } 1095 } else { 1096 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1097 } 1098 1099 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1105 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1106 { 1107 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1108 Mat A = mat->A,B = mat->B; 1109 PetscErrorCode ierr; 1110 PetscReal isend[5],irecv[5]; 1111 1112 PetscFunctionBegin; 1113 info->block_size = 1.0; 1114 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1115 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1116 isend[3] = info->memory; isend[4] = info->mallocs; 1117 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1118 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1119 isend[3] += info->memory; isend[4] += info->mallocs; 1120 if (flag == MAT_LOCAL) { 1121 info->nz_used = isend[0]; 1122 info->nz_allocated = isend[1]; 1123 info->nz_unneeded = isend[2]; 1124 info->memory = isend[3]; 1125 info->mallocs = isend[4]; 1126 } else if (flag == MAT_GLOBAL_MAX) { 1127 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1128 info->nz_used = irecv[0]; 1129 info->nz_allocated = irecv[1]; 1130 info->nz_unneeded = irecv[2]; 1131 info->memory = irecv[3]; 1132 info->mallocs = irecv[4]; 1133 } else if (flag == MAT_GLOBAL_SUM) { 1134 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1135 info->nz_used = irecv[0]; 1136 info->nz_allocated = irecv[1]; 1137 info->nz_unneeded = irecv[2]; 1138 info->memory = irecv[3]; 1139 info->mallocs = irecv[4]; 1140 } 1141 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1142 info->fill_ratio_needed = 0; 1143 info->factor_mallocs = 0; 1144 info->rows_global = (double)matin->M; 1145 info->columns_global = (double)matin->N; 1146 info->rows_local = (double)matin->m; 1147 info->columns_local = (double)matin->N; 1148 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatSetOption_MPIAIJ" 1154 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1155 { 1156 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1157 PetscErrorCode ierr; 1158 1159 PetscFunctionBegin; 1160 switch (op) { 1161 case MAT_NO_NEW_NONZERO_LOCATIONS: 1162 case MAT_YES_NEW_NONZERO_LOCATIONS: 1163 case MAT_COLUMNS_UNSORTED: 1164 case MAT_COLUMNS_SORTED: 1165 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1166 case MAT_KEEP_ZEROED_ROWS: 1167 case MAT_NEW_NONZERO_LOCATION_ERR: 1168 case MAT_USE_INODES: 1169 case MAT_DO_NOT_USE_INODES: 1170 case MAT_IGNORE_ZERO_ENTRIES: 1171 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1172 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1173 break; 1174 case MAT_ROW_ORIENTED: 1175 a->roworiented = PETSC_TRUE; 1176 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1177 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1178 break; 1179 case MAT_ROWS_SORTED: 1180 case MAT_ROWS_UNSORTED: 1181 case MAT_YES_NEW_DIAGONALS: 1182 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1183 break; 1184 case MAT_COLUMN_ORIENTED: 1185 a->roworiented = PETSC_FALSE; 1186 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1187 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1188 break; 1189 case MAT_IGNORE_OFF_PROC_ENTRIES: 1190 a->donotstash = PETSC_TRUE; 1191 break; 1192 case MAT_NO_NEW_DIAGONALS: 1193 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1194 case MAT_SYMMETRIC: 1195 case MAT_STRUCTURALLY_SYMMETRIC: 1196 case MAT_HERMITIAN: 1197 case MAT_SYMMETRY_ETERNAL: 1198 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1199 break; 1200 case MAT_NOT_SYMMETRIC: 1201 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1202 case MAT_NOT_HERMITIAN: 1203 case MAT_NOT_SYMMETRY_ETERNAL: 1204 break; 1205 default: 1206 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1207 } 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatGetRow_MPIAIJ" 1213 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1214 { 1215 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1216 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1217 PetscErrorCode ierr; 1218 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1219 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1220 PetscInt *cmap,*idx_p; 1221 1222 PetscFunctionBegin; 1223 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1224 mat->getrowactive = PETSC_TRUE; 1225 1226 if (!mat->rowvalues && (idx || v)) { 1227 /* 1228 allocate enough space to hold information from the longest row. 1229 */ 1230 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1231 PetscInt max = 1,tmp; 1232 for (i=0; i<matin->m; i++) { 1233 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1234 if (max < tmp) { max = tmp; } 1235 } 1236 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1237 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1238 } 1239 1240 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1241 lrow = row - rstart; 1242 1243 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1244 if (!v) {pvA = 0; pvB = 0;} 1245 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1246 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1247 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1248 nztot = nzA + nzB; 1249 1250 cmap = mat->garray; 1251 if (v || idx) { 1252 if (nztot) { 1253 /* Sort by increasing column numbers, assuming A and B already sorted */ 1254 PetscInt imark = -1; 1255 if (v) { 1256 *v = v_p = mat->rowvalues; 1257 for (i=0; i<nzB; i++) { 1258 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1259 else break; 1260 } 1261 imark = i; 1262 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1263 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1264 } 1265 if (idx) { 1266 *idx = idx_p = mat->rowindices; 1267 if (imark > -1) { 1268 for (i=0; i<imark; i++) { 1269 idx_p[i] = cmap[cworkB[i]]; 1270 } 1271 } else { 1272 for (i=0; i<nzB; i++) { 1273 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1274 else break; 1275 } 1276 imark = i; 1277 } 1278 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1279 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1280 } 1281 } else { 1282 if (idx) *idx = 0; 1283 if (v) *v = 0; 1284 } 1285 } 1286 *nz = nztot; 1287 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1288 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1294 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1295 { 1296 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1297 1298 PetscFunctionBegin; 1299 if (aij->getrowactive == PETSC_FALSE) { 1300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1301 } 1302 aij->getrowactive = PETSC_FALSE; 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatNorm_MPIAIJ" 1308 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1309 { 1310 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1311 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1312 PetscErrorCode ierr; 1313 PetscInt i,j,cstart = aij->cstart; 1314 PetscReal sum = 0.0; 1315 PetscScalar *v; 1316 1317 PetscFunctionBegin; 1318 if (aij->size == 1) { 1319 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1320 } else { 1321 if (type == NORM_FROBENIUS) { 1322 v = amat->a; 1323 for (i=0; i<amat->nz; i++) { 1324 #if defined(PETSC_USE_COMPLEX) 1325 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1326 #else 1327 sum += (*v)*(*v); v++; 1328 #endif 1329 } 1330 v = bmat->a; 1331 for (i=0; i<bmat->nz; i++) { 1332 #if defined(PETSC_USE_COMPLEX) 1333 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1334 #else 1335 sum += (*v)*(*v); v++; 1336 #endif 1337 } 1338 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1339 *norm = sqrt(*norm); 1340 } else if (type == NORM_1) { /* max column norm */ 1341 PetscReal *tmp,*tmp2; 1342 PetscInt *jj,*garray = aij->garray; 1343 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1344 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1345 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1346 *norm = 0.0; 1347 v = amat->a; jj = amat->j; 1348 for (j=0; j<amat->nz; j++) { 1349 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1350 } 1351 v = bmat->a; jj = bmat->j; 1352 for (j=0; j<bmat->nz; j++) { 1353 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1354 } 1355 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1356 for (j=0; j<mat->N; j++) { 1357 if (tmp2[j] > *norm) *norm = tmp2[j]; 1358 } 1359 ierr = PetscFree(tmp);CHKERRQ(ierr); 1360 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1361 } else if (type == NORM_INFINITY) { /* max row norm */ 1362 PetscReal ntemp = 0.0; 1363 for (j=0; j<aij->A->m; j++) { 1364 v = amat->a + amat->i[j]; 1365 sum = 0.0; 1366 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1367 sum += PetscAbsScalar(*v); v++; 1368 } 1369 v = bmat->a + bmat->i[j]; 1370 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1371 sum += PetscAbsScalar(*v); v++; 1372 } 1373 if (sum > ntemp) ntemp = sum; 1374 } 1375 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1376 } else { 1377 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1378 } 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatTranspose_MPIAIJ" 1385 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1386 { 1387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1388 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1389 PetscErrorCode ierr; 1390 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1391 Mat B; 1392 PetscScalar *array; 1393 1394 PetscFunctionBegin; 1395 if (!matout && M != N) { 1396 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1397 } 1398 1399 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1400 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1401 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1402 1403 /* copy over the A part */ 1404 Aloc = (Mat_SeqAIJ*)a->A->data; 1405 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1406 row = a->rstart; 1407 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1408 for (i=0; i<m; i++) { 1409 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1410 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1411 } 1412 aj = Aloc->j; 1413 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1414 1415 /* copy over the B part */ 1416 Aloc = (Mat_SeqAIJ*)a->B->data; 1417 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1418 row = a->rstart; 1419 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1420 ct = cols; 1421 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1422 for (i=0; i<m; i++) { 1423 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1424 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1425 } 1426 ierr = PetscFree(ct);CHKERRQ(ierr); 1427 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1428 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1429 if (matout) { 1430 *matout = B; 1431 } else { 1432 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1433 } 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1439 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1440 { 1441 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1442 Mat a = aij->A,b = aij->B; 1443 PetscErrorCode ierr; 1444 PetscInt s1,s2,s3; 1445 1446 PetscFunctionBegin; 1447 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1448 if (rr) { 1449 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1450 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1451 /* Overlap communication with computation. */ 1452 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1453 } 1454 if (ll) { 1455 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1456 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1457 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1458 } 1459 /* scale the diagonal block */ 1460 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1461 1462 if (rr) { 1463 /* Do a scatter end and then right scale the off-diagonal block */ 1464 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1465 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1466 } 1467 1468 PetscFunctionReturn(0); 1469 } 1470 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1474 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1475 { 1476 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1477 PetscErrorCode ierr; 1478 1479 PetscFunctionBegin; 1480 if (!a->rank) { 1481 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1488 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,PetscInt *bs) 1489 { 1490 PetscFunctionBegin; 1491 *bs = 1; 1492 PetscFunctionReturn(0); 1493 } 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1496 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1497 { 1498 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1499 PetscErrorCode ierr; 1500 1501 PetscFunctionBegin; 1502 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatEqual_MPIAIJ" 1508 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1509 { 1510 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1511 Mat a,b,c,d; 1512 PetscTruth flg; 1513 PetscErrorCode ierr; 1514 1515 PetscFunctionBegin; 1516 a = matA->A; b = matA->B; 1517 c = matB->A; d = matB->B; 1518 1519 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1520 if (flg == PETSC_TRUE) { 1521 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1522 } 1523 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatCopy_MPIAIJ" 1529 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1530 { 1531 PetscErrorCode ierr; 1532 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1533 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1534 1535 PetscFunctionBegin; 1536 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1537 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1538 /* because of the column compression in the off-processor part of the matrix a->B, 1539 the number of columns in a->B and b->B may be different, hence we cannot call 1540 the MatCopy() directly on the two parts. If need be, we can provide a more 1541 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1542 then copying the submatrices */ 1543 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1544 } else { 1545 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1546 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1547 } 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1553 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1554 { 1555 PetscErrorCode ierr; 1556 1557 PetscFunctionBegin; 1558 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #include "petscblaslapack.h" 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MatAXPY_MPIAIJ" 1565 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1566 { 1567 PetscErrorCode ierr; 1568 PetscInt i; 1569 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1570 PetscBLASInt bnz,one=1; 1571 Mat_SeqAIJ *x,*y; 1572 1573 PetscFunctionBegin; 1574 if (str == SAME_NONZERO_PATTERN) { 1575 x = (Mat_SeqAIJ *)xx->A->data; 1576 y = (Mat_SeqAIJ *)yy->A->data; 1577 bnz = (PetscBLASInt)x->nz; 1578 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1579 x = (Mat_SeqAIJ *)xx->B->data; 1580 y = (Mat_SeqAIJ *)yy->B->data; 1581 bnz = (PetscBLASInt)x->nz; 1582 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1583 } else if (str == SUBSET_NONZERO_PATTERN) { 1584 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1585 1586 x = (Mat_SeqAIJ *)xx->B->data; 1587 y = (Mat_SeqAIJ *)yy->B->data; 1588 if (y->xtoy && y->XtoY != xx->B) { 1589 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1590 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1591 } 1592 if (!y->xtoy) { /* get xtoy */ 1593 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1594 y->XtoY = xx->B; 1595 } 1596 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1597 } else { 1598 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1599 } 1600 PetscFunctionReturn(0); 1601 } 1602 1603 /* -------------------------------------------------------------------*/ 1604 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1605 MatGetRow_MPIAIJ, 1606 MatRestoreRow_MPIAIJ, 1607 MatMult_MPIAIJ, 1608 /* 4*/ MatMultAdd_MPIAIJ, 1609 MatMultTranspose_MPIAIJ, 1610 MatMultTransposeAdd_MPIAIJ, 1611 0, 1612 0, 1613 0, 1614 /*10*/ 0, 1615 0, 1616 0, 1617 MatRelax_MPIAIJ, 1618 MatTranspose_MPIAIJ, 1619 /*15*/ MatGetInfo_MPIAIJ, 1620 MatEqual_MPIAIJ, 1621 MatGetDiagonal_MPIAIJ, 1622 MatDiagonalScale_MPIAIJ, 1623 MatNorm_MPIAIJ, 1624 /*20*/ MatAssemblyBegin_MPIAIJ, 1625 MatAssemblyEnd_MPIAIJ, 1626 0, 1627 MatSetOption_MPIAIJ, 1628 MatZeroEntries_MPIAIJ, 1629 /*25*/ MatZeroRows_MPIAIJ, 1630 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_64BIT_INT) 1631 MatLUFactorSymbolic_MPIAIJ_TFS, 1632 #else 1633 0, 1634 #endif 1635 0, 1636 0, 1637 0, 1638 /*30*/ MatSetUpPreallocation_MPIAIJ, 1639 0, 1640 0, 1641 0, 1642 0, 1643 /*35*/ MatDuplicate_MPIAIJ, 1644 0, 1645 0, 1646 0, 1647 0, 1648 /*40*/ MatAXPY_MPIAIJ, 1649 MatGetSubMatrices_MPIAIJ, 1650 MatIncreaseOverlap_MPIAIJ, 1651 MatGetValues_MPIAIJ, 1652 MatCopy_MPIAIJ, 1653 /*45*/ MatPrintHelp_MPIAIJ, 1654 MatScale_MPIAIJ, 1655 0, 1656 0, 1657 0, 1658 /*50*/ MatGetBlockSize_MPIAIJ, 1659 0, 1660 0, 1661 0, 1662 0, 1663 /*55*/ MatFDColoringCreate_MPIAIJ, 1664 0, 1665 MatSetUnfactored_MPIAIJ, 1666 0, 1667 0, 1668 /*60*/ MatGetSubMatrix_MPIAIJ, 1669 MatDestroy_MPIAIJ, 1670 MatView_MPIAIJ, 1671 MatGetPetscMaps_Petsc, 1672 0, 1673 /*65*/ 0, 1674 0, 1675 0, 1676 0, 1677 0, 1678 /*70*/ 0, 1679 0, 1680 MatSetColoring_MPIAIJ, 1681 MatSetValuesAdic_MPIAIJ, 1682 MatSetValuesAdifor_MPIAIJ, 1683 /*75*/ 0, 1684 0, 1685 0, 1686 0, 1687 0, 1688 /*80*/ 0, 1689 0, 1690 0, 1691 0, 1692 /*84*/ MatLoad_MPIAIJ, 1693 0, 1694 0, 1695 0, 1696 0, 1697 0, 1698 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1699 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1700 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1701 MatPtAP_MPIAIJ_MPIAIJ, 1702 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1703 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1704 0, 1705 0, 1706 0}; 1707 1708 /* ----------------------------------------------------------------------------------------*/ 1709 1710 EXTERN_C_BEGIN 1711 #undef __FUNCT__ 1712 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1713 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1714 { 1715 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1716 PetscErrorCode ierr; 1717 1718 PetscFunctionBegin; 1719 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1720 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1721 PetscFunctionReturn(0); 1722 } 1723 EXTERN_C_END 1724 1725 EXTERN_C_BEGIN 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1728 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1729 { 1730 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1735 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 EXTERN_C_END 1739 1740 #include "petscpc.h" 1741 EXTERN_C_BEGIN 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1744 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1745 { 1746 Mat_MPIAIJ *b; 1747 PetscErrorCode ierr; 1748 PetscInt i; 1749 1750 PetscFunctionBegin; 1751 B->preallocated = PETSC_TRUE; 1752 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1753 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1754 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1755 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1756 if (d_nnz) { 1757 for (i=0; i<B->m; i++) { 1758 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1759 } 1760 } 1761 if (o_nnz) { 1762 for (i=0; i<B->m; i++) { 1763 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1764 } 1765 } 1766 b = (Mat_MPIAIJ*)B->data; 1767 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1768 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1769 1770 PetscFunctionReturn(0); 1771 } 1772 EXTERN_C_END 1773 1774 /*MC 1775 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1776 1777 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1778 and MATMPIAIJ otherwise. As a result, for single process communicators, 1779 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1780 for communicators controlling multiple processes. It is recommended that you call both of 1781 the above preallocation routines for simplicity. 1782 1783 Options Database Keys: 1784 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1785 1786 Level: beginner 1787 1788 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1789 M*/ 1790 1791 EXTERN_C_BEGIN 1792 #undef __FUNCT__ 1793 #define __FUNCT__ "MatCreate_AIJ" 1794 PetscErrorCode MatCreate_AIJ(Mat A) 1795 { 1796 PetscErrorCode ierr; 1797 PetscMPIInt size; 1798 1799 PetscFunctionBegin; 1800 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1801 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1802 if (size == 1) { 1803 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1804 } else { 1805 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1806 } 1807 PetscFunctionReturn(0); 1808 } 1809 EXTERN_C_END 1810 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1813 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1814 { 1815 Mat mat; 1816 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 *newmat = 0; 1821 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1822 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1823 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1824 a = (Mat_MPIAIJ*)mat->data; 1825 1826 mat->factor = matin->factor; 1827 mat->assembled = PETSC_TRUE; 1828 mat->insertmode = NOT_SET_VALUES; 1829 mat->preallocated = PETSC_TRUE; 1830 1831 a->rstart = oldmat->rstart; 1832 a->rend = oldmat->rend; 1833 a->cstart = oldmat->cstart; 1834 a->cend = oldmat->cend; 1835 a->size = oldmat->size; 1836 a->rank = oldmat->rank; 1837 a->donotstash = oldmat->donotstash; 1838 a->roworiented = oldmat->roworiented; 1839 a->rowindices = 0; 1840 a->rowvalues = 0; 1841 a->getrowactive = PETSC_FALSE; 1842 1843 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1844 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1845 if (oldmat->colmap) { 1846 #if defined (PETSC_USE_CTABLE) 1847 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1848 #else 1849 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1850 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1851 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1852 #endif 1853 } else a->colmap = 0; 1854 if (oldmat->garray) { 1855 PetscInt len; 1856 len = oldmat->B->n; 1857 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1858 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1859 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1860 } else a->garray = 0; 1861 1862 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1863 PetscLogObjectParent(mat,a->lvec); 1864 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1865 PetscLogObjectParent(mat,a->Mvctx); 1866 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1867 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1868 PetscLogObjectParent(mat,a->A); 1869 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1870 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1871 PetscLogObjectParent(mat,a->B); 1872 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1873 *newmat = mat; 1874 PetscFunctionReturn(0); 1875 } 1876 1877 #include "petscsys.h" 1878 1879 #undef __FUNCT__ 1880 #define __FUNCT__ "MatLoad_MPIAIJ" 1881 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1882 { 1883 Mat A; 1884 PetscScalar *vals,*svals; 1885 MPI_Comm comm = ((PetscObject)viewer)->comm; 1886 MPI_Status status; 1887 PetscErrorCode ierr; 1888 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*rowners,maxnz,mm; 1889 PetscInt i,nz,j,rstart,rend; 1890 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1891 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1892 PetscInt cend,cstart,n; 1893 int fd; 1894 1895 PetscFunctionBegin; 1896 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1897 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1898 if (!rank) { 1899 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1900 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1901 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1902 if (header[3] < 0) { 1903 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1904 } 1905 } 1906 1907 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1908 M = header[1]; N = header[2]; 1909 /* determine ownership of all rows */ 1910 m = M/size + ((M % size) > rank); 1911 mm = (PetscMPIInt) m; 1912 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1913 ierr = MPI_Allgather(&mm,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1914 rowners[0] = 0; 1915 for (i=2; i<=size; i++) { 1916 rowners[i] += rowners[i-1]; 1917 } 1918 rstart = rowners[rank]; 1919 rend = rowners[rank+1]; 1920 1921 /* distribute row lengths to all processors */ 1922 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1923 offlens = ourlens + (rend-rstart); 1924 if (!rank) { 1925 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1926 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1927 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1928 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1929 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1930 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1931 } else { 1932 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1933 } 1934 1935 if (!rank) { 1936 /* calculate the number of nonzeros on each processor */ 1937 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1938 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1939 for (i=0; i<size; i++) { 1940 for (j=rowners[i]; j< rowners[i+1]; j++) { 1941 procsnz[i] += rowlengths[j]; 1942 } 1943 } 1944 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1945 1946 /* determine max buffer needed and allocate it */ 1947 maxnz = 0; 1948 for (i=0; i<size; i++) { 1949 maxnz = PetscMax(maxnz,procsnz[i]); 1950 } 1951 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1952 1953 /* read in my part of the matrix column indices */ 1954 nz = procsnz[0]; 1955 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1956 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1957 1958 /* read in every one elses and ship off */ 1959 for (i=1; i<size; i++) { 1960 nz = procsnz[i]; 1961 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1962 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1963 } 1964 ierr = PetscFree(cols);CHKERRQ(ierr); 1965 } else { 1966 /* determine buffer space needed for message */ 1967 nz = 0; 1968 for (i=0; i<m; i++) { 1969 nz += ourlens[i]; 1970 } 1971 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1972 1973 /* receive message of column indices*/ 1974 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1975 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1976 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1977 } 1978 1979 /* determine column ownership if matrix is not square */ 1980 if (N != M) { 1981 n = N/size + ((N % size) > rank); 1982 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1983 cstart = cend - n; 1984 } else { 1985 cstart = rstart; 1986 cend = rend; 1987 n = cend - cstart; 1988 } 1989 1990 /* loop over local rows, determining number of off diagonal entries */ 1991 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1992 jj = 0; 1993 for (i=0; i<m; i++) { 1994 for (j=0; j<ourlens[i]; j++) { 1995 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1996 jj++; 1997 } 1998 } 1999 2000 /* create our matrix */ 2001 for (i=0; i<m; i++) { 2002 ourlens[i] -= offlens[i]; 2003 } 2004 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2005 ierr = MatSetType(A,type);CHKERRQ(ierr); 2006 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2007 2008 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2009 for (i=0; i<m; i++) { 2010 ourlens[i] += offlens[i]; 2011 } 2012 2013 if (!rank) { 2014 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2015 2016 /* read in my part of the matrix numerical values */ 2017 nz = procsnz[0]; 2018 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2019 2020 /* insert into matrix */ 2021 jj = rstart; 2022 smycols = mycols; 2023 svals = vals; 2024 for (i=0; i<m; i++) { 2025 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2026 smycols += ourlens[i]; 2027 svals += ourlens[i]; 2028 jj++; 2029 } 2030 2031 /* read in other processors and ship out */ 2032 for (i=1; i<size; i++) { 2033 nz = procsnz[i]; 2034 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2035 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2036 } 2037 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2038 } else { 2039 /* receive numeric values */ 2040 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2041 2042 /* receive message of values*/ 2043 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2044 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2045 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2046 2047 /* insert into matrix */ 2048 jj = rstart; 2049 smycols = mycols; 2050 svals = vals; 2051 for (i=0; i<m; i++) { 2052 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2053 smycols += ourlens[i]; 2054 svals += ourlens[i]; 2055 jj++; 2056 } 2057 } 2058 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2059 ierr = PetscFree(vals);CHKERRQ(ierr); 2060 ierr = PetscFree(mycols);CHKERRQ(ierr); 2061 ierr = PetscFree(rowners);CHKERRQ(ierr); 2062 2063 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2064 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2065 *newmat = A; 2066 PetscFunctionReturn(0); 2067 } 2068 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2071 /* 2072 Not great since it makes two copies of the submatrix, first an SeqAIJ 2073 in local and then by concatenating the local matrices the end result. 2074 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2075 */ 2076 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2077 { 2078 PetscErrorCode ierr; 2079 PetscMPIInt rank,size; 2080 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2081 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2082 Mat *local,M,Mreuse; 2083 PetscScalar *vwork,*aa; 2084 MPI_Comm comm = mat->comm; 2085 Mat_SeqAIJ *aij; 2086 2087 2088 PetscFunctionBegin; 2089 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2090 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2091 2092 if (call == MAT_REUSE_MATRIX) { 2093 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2094 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2095 local = &Mreuse; 2096 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2097 } else { 2098 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2099 Mreuse = *local; 2100 ierr = PetscFree(local);CHKERRQ(ierr); 2101 } 2102 2103 /* 2104 m - number of local rows 2105 n - number of columns (same on all processors) 2106 rstart - first row in new global matrix generated 2107 */ 2108 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2109 if (call == MAT_INITIAL_MATRIX) { 2110 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2111 ii = aij->i; 2112 jj = aij->j; 2113 2114 /* 2115 Determine the number of non-zeros in the diagonal and off-diagonal 2116 portions of the matrix in order to do correct preallocation 2117 */ 2118 2119 /* first get start and end of "diagonal" columns */ 2120 if (csize == PETSC_DECIDE) { 2121 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2122 if (mglobal == n) { /* square matrix */ 2123 nlocal = m; 2124 } else { 2125 nlocal = n/size + ((n % size) > rank); 2126 } 2127 } else { 2128 nlocal = csize; 2129 } 2130 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2131 rstart = rend - nlocal; 2132 if (rank == size - 1 && rend != n) { 2133 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2134 } 2135 2136 /* next, compute all the lengths */ 2137 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2138 olens = dlens + m; 2139 for (i=0; i<m; i++) { 2140 jend = ii[i+1] - ii[i]; 2141 olen = 0; 2142 dlen = 0; 2143 for (j=0; j<jend; j++) { 2144 if (*jj < rstart || *jj >= rend) olen++; 2145 else dlen++; 2146 jj++; 2147 } 2148 olens[i] = olen; 2149 dlens[i] = dlen; 2150 } 2151 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2152 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2153 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2154 ierr = PetscFree(dlens);CHKERRQ(ierr); 2155 } else { 2156 PetscInt ml,nl; 2157 2158 M = *newmat; 2159 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2160 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2161 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2162 /* 2163 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2164 rather than the slower MatSetValues(). 2165 */ 2166 M->was_assembled = PETSC_TRUE; 2167 M->assembled = PETSC_FALSE; 2168 } 2169 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2170 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2171 ii = aij->i; 2172 jj = aij->j; 2173 aa = aij->a; 2174 for (i=0; i<m; i++) { 2175 row = rstart + i; 2176 nz = ii[i+1] - ii[i]; 2177 cwork = jj; jj += nz; 2178 vwork = aa; aa += nz; 2179 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2180 } 2181 2182 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2183 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2184 *newmat = M; 2185 2186 /* save submatrix used in processor for next request */ 2187 if (call == MAT_INITIAL_MATRIX) { 2188 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2189 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2190 } 2191 2192 PetscFunctionReturn(0); 2193 } 2194 2195 EXTERN_C_BEGIN 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2198 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2199 { 2200 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2201 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2202 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2203 const PetscInt *JJ; 2204 PetscScalar *values; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 #if defined(PETSC_OPT_g) 2209 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2210 #endif 2211 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2212 o_nnz = d_nnz + m; 2213 2214 for (i=0; i<m; i++) { 2215 nnz = I[i+1]- I[i]; 2216 JJ = J + I[i]; 2217 nnz_max = PetscMax(nnz_max,nnz); 2218 #if defined(PETSC_OPT_g) 2219 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2220 #endif 2221 for (j=0; j<nnz; j++) { 2222 if (*JJ >= cstart) break; 2223 JJ++; 2224 } 2225 d = 0; 2226 for (; j<nnz; j++) { 2227 if (*JJ++ >= cend) break; 2228 d++; 2229 } 2230 d_nnz[i] = d; 2231 o_nnz[i] = nnz - d; 2232 } 2233 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2234 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2235 2236 if (v) values = (PetscScalar*)v; 2237 else { 2238 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2239 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2240 } 2241 2242 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2243 for (i=0; i<m; i++) { 2244 ii = i + rstart; 2245 nnz = I[i+1]- I[i]; 2246 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2247 } 2248 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2249 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2250 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2251 2252 if (!v) { 2253 ierr = PetscFree(values);CHKERRQ(ierr); 2254 } 2255 PetscFunctionReturn(0); 2256 } 2257 EXTERN_C_END 2258 2259 #undef __FUNCT__ 2260 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2261 /*@C 2262 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2263 (the default parallel PETSc format). 2264 2265 Collective on MPI_Comm 2266 2267 Input Parameters: 2268 + A - the matrix 2269 . i - the indices into j for the start of each local row (starts with zero) 2270 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2271 - v - optional values in the matrix 2272 2273 Level: developer 2274 2275 .keywords: matrix, aij, compressed row, sparse, parallel 2276 2277 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2278 @*/ 2279 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2280 { 2281 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2282 2283 PetscFunctionBegin; 2284 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2285 if (f) { 2286 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2287 } 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2293 /*@C 2294 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2295 (the default parallel PETSc format). For good matrix assembly performance 2296 the user should preallocate the matrix storage by setting the parameters 2297 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2298 performance can be increased by more than a factor of 50. 2299 2300 Collective on MPI_Comm 2301 2302 Input Parameters: 2303 + A - the matrix 2304 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2305 (same value is used for all local rows) 2306 . d_nnz - array containing the number of nonzeros in the various rows of the 2307 DIAGONAL portion of the local submatrix (possibly different for each row) 2308 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2309 The size of this array is equal to the number of local rows, i.e 'm'. 2310 You must leave room for the diagonal entry even if it is zero. 2311 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2312 submatrix (same value is used for all local rows). 2313 - o_nnz - array containing the number of nonzeros in the various rows of the 2314 OFF-DIAGONAL portion of the local submatrix (possibly different for 2315 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2316 structure. The size of this array is equal to the number 2317 of local rows, i.e 'm'. 2318 2319 The AIJ format (also called the Yale sparse matrix format or 2320 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2321 storage. The stored row and column indices begin with zero. See the users manual for details. 2322 2323 The parallel matrix is partitioned such that the first m0 rows belong to 2324 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2325 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2326 2327 The DIAGONAL portion of the local submatrix of a processor can be defined 2328 as the submatrix which is obtained by extraction the part corresponding 2329 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2330 first row that belongs to the processor, and r2 is the last row belonging 2331 to the this processor. This is a square mxm matrix. The remaining portion 2332 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2333 2334 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2335 2336 Example usage: 2337 2338 Consider the following 8x8 matrix with 34 non-zero values, that is 2339 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2340 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2341 as follows: 2342 2343 .vb 2344 1 2 0 | 0 3 0 | 0 4 2345 Proc0 0 5 6 | 7 0 0 | 8 0 2346 9 0 10 | 11 0 0 | 12 0 2347 ------------------------------------- 2348 13 0 14 | 15 16 17 | 0 0 2349 Proc1 0 18 0 | 19 20 21 | 0 0 2350 0 0 0 | 22 23 0 | 24 0 2351 ------------------------------------- 2352 Proc2 25 26 27 | 0 0 28 | 29 0 2353 30 0 0 | 31 32 33 | 0 34 2354 .ve 2355 2356 This can be represented as a collection of submatrices as: 2357 2358 .vb 2359 A B C 2360 D E F 2361 G H I 2362 .ve 2363 2364 Where the submatrices A,B,C are owned by proc0, D,E,F are 2365 owned by proc1, G,H,I are owned by proc2. 2366 2367 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2368 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2369 The 'M','N' parameters are 8,8, and have the same values on all procs. 2370 2371 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2372 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2373 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2374 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2375 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2376 matrix, ans [DF] as another SeqAIJ matrix. 2377 2378 When d_nz, o_nz parameters are specified, d_nz storage elements are 2379 allocated for every row of the local diagonal submatrix, and o_nz 2380 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2381 One way to choose d_nz and o_nz is to use the max nonzerors per local 2382 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2383 In this case, the values of d_nz,o_nz are: 2384 .vb 2385 proc0 : dnz = 2, o_nz = 2 2386 proc1 : dnz = 3, o_nz = 2 2387 proc2 : dnz = 1, o_nz = 4 2388 .ve 2389 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2390 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2391 for proc3. i.e we are using 12+15+10=37 storage locations to store 2392 34 values. 2393 2394 When d_nnz, o_nnz parameters are specified, the storage is specified 2395 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2396 In the above case the values for d_nnz,o_nnz are: 2397 .vb 2398 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2399 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2400 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2401 .ve 2402 Here the space allocated is sum of all the above values i.e 34, and 2403 hence pre-allocation is perfect. 2404 2405 Level: intermediate 2406 2407 .keywords: matrix, aij, compressed row, sparse, parallel 2408 2409 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2410 MPIAIJ 2411 @*/ 2412 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2413 { 2414 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2415 2416 PetscFunctionBegin; 2417 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2418 if (f) { 2419 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2420 } 2421 PetscFunctionReturn(0); 2422 } 2423 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "MatCreateMPIAIJ" 2426 /*@C 2427 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2428 (the default parallel PETSc format). For good matrix assembly performance 2429 the user should preallocate the matrix storage by setting the parameters 2430 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2431 performance can be increased by more than a factor of 50. 2432 2433 Collective on MPI_Comm 2434 2435 Input Parameters: 2436 + comm - MPI communicator 2437 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2438 This value should be the same as the local size used in creating the 2439 y vector for the matrix-vector product y = Ax. 2440 . n - This value should be the same as the local size used in creating the 2441 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2442 calculated if N is given) For square matrices n is almost always m. 2443 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2444 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2445 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2446 (same value is used for all local rows) 2447 . d_nnz - array containing the number of nonzeros in the various rows of the 2448 DIAGONAL portion of the local submatrix (possibly different for each row) 2449 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2450 The size of this array is equal to the number of local rows, i.e 'm'. 2451 You must leave room for the diagonal entry even if it is zero. 2452 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2453 submatrix (same value is used for all local rows). 2454 - o_nnz - array containing the number of nonzeros in the various rows of the 2455 OFF-DIAGONAL portion of the local submatrix (possibly different for 2456 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2457 structure. The size of this array is equal to the number 2458 of local rows, i.e 'm'. 2459 2460 Output Parameter: 2461 . A - the matrix 2462 2463 Notes: 2464 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2465 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2466 storage requirements for this matrix. 2467 2468 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2469 processor than it must be used on all processors that share the object for 2470 that argument. 2471 2472 The user MUST specify either the local or global matrix dimensions 2473 (possibly both). 2474 2475 The parallel matrix is partitioned such that the first m0 rows belong to 2476 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2477 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2478 2479 The DIAGONAL portion of the local submatrix of a processor can be defined 2480 as the submatrix which is obtained by extraction the part corresponding 2481 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2482 first row that belongs to the processor, and r2 is the last row belonging 2483 to the this processor. This is a square mxm matrix. The remaining portion 2484 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2485 2486 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2487 2488 When calling this routine with a single process communicator, a matrix of 2489 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2490 type of communicator, use the construction mechanism: 2491 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2492 2493 By default, this format uses inodes (identical nodes) when possible. 2494 We search for consecutive rows with the same nonzero structure, thereby 2495 reusing matrix information to achieve increased efficiency. 2496 2497 Options Database Keys: 2498 + -mat_aij_no_inode - Do not use inodes 2499 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2500 - -mat_aij_oneindex - Internally use indexing starting at 1 2501 rather than 0. Note that when calling MatSetValues(), 2502 the user still MUST index entries starting at 0! 2503 2504 2505 Example usage: 2506 2507 Consider the following 8x8 matrix with 34 non-zero values, that is 2508 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2509 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2510 as follows: 2511 2512 .vb 2513 1 2 0 | 0 3 0 | 0 4 2514 Proc0 0 5 6 | 7 0 0 | 8 0 2515 9 0 10 | 11 0 0 | 12 0 2516 ------------------------------------- 2517 13 0 14 | 15 16 17 | 0 0 2518 Proc1 0 18 0 | 19 20 21 | 0 0 2519 0 0 0 | 22 23 0 | 24 0 2520 ------------------------------------- 2521 Proc2 25 26 27 | 0 0 28 | 29 0 2522 30 0 0 | 31 32 33 | 0 34 2523 .ve 2524 2525 This can be represented as a collection of submatrices as: 2526 2527 .vb 2528 A B C 2529 D E F 2530 G H I 2531 .ve 2532 2533 Where the submatrices A,B,C are owned by proc0, D,E,F are 2534 owned by proc1, G,H,I are owned by proc2. 2535 2536 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2537 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2538 The 'M','N' parameters are 8,8, and have the same values on all procs. 2539 2540 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2541 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2542 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2543 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2544 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2545 matrix, ans [DF] as another SeqAIJ matrix. 2546 2547 When d_nz, o_nz parameters are specified, d_nz storage elements are 2548 allocated for every row of the local diagonal submatrix, and o_nz 2549 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2550 One way to choose d_nz and o_nz is to use the max nonzerors per local 2551 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2552 In this case, the values of d_nz,o_nz are: 2553 .vb 2554 proc0 : dnz = 2, o_nz = 2 2555 proc1 : dnz = 3, o_nz = 2 2556 proc2 : dnz = 1, o_nz = 4 2557 .ve 2558 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2559 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2560 for proc3. i.e we are using 12+15+10=37 storage locations to store 2561 34 values. 2562 2563 When d_nnz, o_nnz parameters are specified, the storage is specified 2564 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2565 In the above case the values for d_nnz,o_nnz are: 2566 .vb 2567 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2568 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2569 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2570 .ve 2571 Here the space allocated is sum of all the above values i.e 34, and 2572 hence pre-allocation is perfect. 2573 2574 Level: intermediate 2575 2576 .keywords: matrix, aij, compressed row, sparse, parallel 2577 2578 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2579 MPIAIJ 2580 @*/ 2581 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2582 { 2583 PetscErrorCode ierr; 2584 PetscMPIInt size; 2585 2586 PetscFunctionBegin; 2587 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2588 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2589 if (size > 1) { 2590 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2591 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2592 } else { 2593 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2594 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2595 } 2596 PetscFunctionReturn(0); 2597 } 2598 2599 #undef __FUNCT__ 2600 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2601 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2602 { 2603 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2604 2605 PetscFunctionBegin; 2606 *Ad = a->A; 2607 *Ao = a->B; 2608 *colmap = a->garray; 2609 PetscFunctionReturn(0); 2610 } 2611 2612 #undef __FUNCT__ 2613 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2614 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2615 { 2616 PetscErrorCode ierr; 2617 PetscInt i; 2618 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2619 2620 PetscFunctionBegin; 2621 if (coloring->ctype == IS_COLORING_LOCAL) { 2622 ISColoringValue *allcolors,*colors; 2623 ISColoring ocoloring; 2624 2625 /* set coloring for diagonal portion */ 2626 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2627 2628 /* set coloring for off-diagonal portion */ 2629 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2630 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2631 for (i=0; i<a->B->n; i++) { 2632 colors[i] = allcolors[a->garray[i]]; 2633 } 2634 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2635 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2636 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2637 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2638 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2639 ISColoringValue *colors; 2640 PetscInt *larray; 2641 ISColoring ocoloring; 2642 2643 /* set coloring for diagonal portion */ 2644 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2645 for (i=0; i<a->A->n; i++) { 2646 larray[i] = i + a->cstart; 2647 } 2648 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2649 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2650 for (i=0; i<a->A->n; i++) { 2651 colors[i] = coloring->colors[larray[i]]; 2652 } 2653 ierr = PetscFree(larray);CHKERRQ(ierr); 2654 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2655 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2656 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2657 2658 /* set coloring for off-diagonal portion */ 2659 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2660 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2661 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2662 for (i=0; i<a->B->n; i++) { 2663 colors[i] = coloring->colors[larray[i]]; 2664 } 2665 ierr = PetscFree(larray);CHKERRQ(ierr); 2666 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2667 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2668 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2669 } else { 2670 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2671 } 2672 2673 PetscFunctionReturn(0); 2674 } 2675 2676 #undef __FUNCT__ 2677 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2678 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2679 { 2680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2681 PetscErrorCode ierr; 2682 2683 PetscFunctionBegin; 2684 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2685 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2686 PetscFunctionReturn(0); 2687 } 2688 2689 #undef __FUNCT__ 2690 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2691 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2692 { 2693 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2694 PetscErrorCode ierr; 2695 2696 PetscFunctionBegin; 2697 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2698 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2699 PetscFunctionReturn(0); 2700 } 2701 2702 #undef __FUNCT__ 2703 #define __FUNCT__ "MatMerge" 2704 /*@C 2705 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2706 matrices from each processor 2707 2708 Collective on MPI_Comm 2709 2710 Input Parameters: 2711 + comm - the communicators the parallel matrix will live on 2712 . inmat - the input sequential matrices 2713 . n - number of local columns (or PETSC_DECIDE) 2714 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2715 2716 Output Parameter: 2717 . outmat - the parallel matrix generated 2718 2719 Level: advanced 2720 2721 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2722 2723 @*/ 2724 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2725 { 2726 PetscErrorCode ierr; 2727 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2728 const PetscInt *indx; 2729 const PetscScalar *values; 2730 PetscMap columnmap,rowmap; 2731 2732 PetscFunctionBegin; 2733 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2734 /* 2735 PetscMPIInt rank; 2736 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2737 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2738 */ 2739 if (scall == MAT_INITIAL_MATRIX){ 2740 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2741 if (n == PETSC_DECIDE){ 2742 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2743 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2744 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2745 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2746 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2747 } 2748 2749 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2750 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2751 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2752 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2753 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2754 2755 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2756 for (i=0;i<m;i++) { 2757 ierr = MatGetRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2758 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2759 ierr = MatRestoreRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2760 } 2761 /* This routine will ONLY return MPIAIJ type matrix */ 2762 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2763 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2764 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2765 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2766 2767 } else if (scall == MAT_REUSE_MATRIX){ 2768 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2769 } else { 2770 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2771 } 2772 2773 for (i=0;i<m;i++) { 2774 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2775 I = i + rstart; 2776 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2777 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2778 } 2779 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2780 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2781 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2782 2783 PetscFunctionReturn(0); 2784 } 2785 2786 #undef __FUNCT__ 2787 #define __FUNCT__ "MatFileSplit" 2788 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2789 { 2790 PetscErrorCode ierr; 2791 PetscMPIInt rank; 2792 PetscInt m,N,i,rstart,nnz; 2793 size_t len; 2794 const PetscInt *indx; 2795 PetscViewer out; 2796 char *name; 2797 Mat B; 2798 const PetscScalar *values; 2799 2800 PetscFunctionBegin; 2801 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2802 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2803 /* Should this be the type of the diagonal block of A? */ 2804 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2805 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2806 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2807 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2808 for (i=0;i<m;i++) { 2809 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2810 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2811 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2812 } 2813 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2814 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2815 2816 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2817 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2818 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2819 sprintf(name,"%s.%d",outfile,rank); 2820 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2821 ierr = PetscFree(name); 2822 ierr = MatView(B,out);CHKERRQ(ierr); 2823 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2824 ierr = MatDestroy(B);CHKERRQ(ierr); 2825 PetscFunctionReturn(0); 2826 } 2827 2828 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2829 #undef __FUNCT__ 2830 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2831 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2832 { 2833 PetscErrorCode ierr; 2834 Mat_Merge_SeqsToMPI *merge; 2835 PetscObjectContainer container; 2836 2837 PetscFunctionBegin; 2838 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2839 if (container) { 2840 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2841 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2842 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2843 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2844 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2845 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2846 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2847 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2848 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2849 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2850 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2851 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2852 2853 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2854 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2855 } 2856 ierr = PetscFree(merge);CHKERRQ(ierr); 2857 2858 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2859 PetscFunctionReturn(0); 2860 } 2861 2862 #include "src/mat/utils/freespace.h" 2863 #include "petscbt.h" 2864 #undef __FUNCT__ 2865 #define __FUNCT__ "MatMerge_SeqsToMPI" 2866 /*@C 2867 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2868 matrices from each processor 2869 2870 Collective on MPI_Comm 2871 2872 Input Parameters: 2873 + comm - the communicators the parallel matrix will live on 2874 . seqmat - the input sequential matrices 2875 . m - number of local rows (or PETSC_DECIDE) 2876 . n - number of local columns (or PETSC_DECIDE) 2877 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2878 2879 Output Parameter: 2880 . mpimat - the parallel matrix generated 2881 2882 Level: advanced 2883 2884 Notes: 2885 The dimensions of the sequential matrix in each processor MUST be the same. 2886 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2887 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2888 @*/ 2889 static PetscEvent logkey_seqstompinum = 0; 2890 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2891 { 2892 PetscErrorCode ierr; 2893 MPI_Comm comm=mpimat->comm; 2894 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2895 PetscMPIInt size,rank,taga,*len_s; 2896 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2897 PetscInt proc,m; 2898 PetscInt **buf_ri,**buf_rj; 2899 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2900 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2901 MPI_Request *s_waits,*r_waits; 2902 MPI_Status *status; 2903 MatScalar *aa=a->a,**abuf_r,*ba_i; 2904 Mat_Merge_SeqsToMPI *merge; 2905 PetscObjectContainer container; 2906 2907 PetscFunctionBegin; 2908 if (!logkey_seqstompinum) { 2909 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2910 } 2911 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2912 2913 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2914 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2915 2916 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2917 if (container) { 2918 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2919 } 2920 bi = merge->bi; 2921 bj = merge->bj; 2922 buf_ri = merge->buf_ri; 2923 buf_rj = merge->buf_rj; 2924 2925 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2926 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2927 len_s = merge->len_s; 2928 2929 /* send and recv matrix values */ 2930 /*-----------------------------*/ 2931 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2932 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2933 2934 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2935 for (proc=0,k=0; proc<size; proc++){ 2936 if (!len_s[proc]) continue; 2937 i = owners[proc]; 2938 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2939 k++; 2940 } 2941 2942 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2943 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2944 ierr = PetscFree(status);CHKERRQ(ierr); 2945 2946 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2947 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2948 2949 /* insert mat values of mpimat */ 2950 /*----------------------------*/ 2951 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2952 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2953 nextrow = buf_ri_k + merge->nrecv; 2954 nextai = nextrow + merge->nrecv; 2955 2956 for (k=0; k<merge->nrecv; k++){ 2957 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2958 nrows = *(buf_ri_k[k]); 2959 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2960 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2961 } 2962 2963 /* set values of ba */ 2964 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2965 for (i=0; i<m; i++) { 2966 arow = owners[rank] + i; 2967 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2968 bnzi = bi[i+1] - bi[i]; 2969 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2970 2971 /* add local non-zero vals of this proc's seqmat into ba */ 2972 anzi = ai[arow+1] - ai[arow]; 2973 aj = a->j + ai[arow]; 2974 aa = a->a + ai[arow]; 2975 nextaj = 0; 2976 for (j=0; nextaj<anzi; j++){ 2977 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2978 ba_i[j] += aa[nextaj++]; 2979 } 2980 } 2981 2982 /* add received vals into ba */ 2983 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2984 /* i-th row */ 2985 if (i == *nextrow[k]) { 2986 anzi = *(nextai[k]+1) - *nextai[k]; 2987 aj = buf_rj[k] + *(nextai[k]); 2988 aa = abuf_r[k] + *(nextai[k]); 2989 nextaj = 0; 2990 for (j=0; nextaj<anzi; j++){ 2991 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2992 ba_i[j] += aa[nextaj++]; 2993 } 2994 } 2995 nextrow[k]++; nextai[k]++; 2996 } 2997 } 2998 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 2999 } 3000 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3001 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3002 3003 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3004 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3005 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3006 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3007 PetscFunctionReturn(0); 3008 } 3009 static PetscEvent logkey_seqstompisym = 0; 3010 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3011 { 3012 PetscErrorCode ierr; 3013 Mat B_mpi; 3014 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3015 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3016 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3017 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3018 PetscInt len,proc,*dnz,*onz; 3019 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3020 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3021 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3022 MPI_Status *status; 3023 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3024 PetscBT lnkbt; 3025 Mat_Merge_SeqsToMPI *merge; 3026 PetscObjectContainer container; 3027 3028 PetscFunctionBegin; 3029 if (!logkey_seqstompisym) { 3030 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3031 } 3032 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3033 3034 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3035 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3036 3037 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3038 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3039 3040 /* determine row ownership */ 3041 /*---------------------------------------------------------*/ 3042 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3043 if (m == PETSC_DECIDE) { 3044 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3045 } else { 3046 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3047 } 3048 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3049 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3050 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3051 3052 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3053 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3054 3055 /* determine the number of messages to send, their lengths */ 3056 /*---------------------------------------------------------*/ 3057 len_s = merge->len_s; 3058 3059 len = 0; /* length of buf_si[] */ 3060 merge->nsend = 0; 3061 for (proc=0; proc<size; proc++){ 3062 len_si[proc] = 0; 3063 if (proc == rank){ 3064 len_s[proc] = 0; 3065 } else { 3066 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3067 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3068 } 3069 if (len_s[proc]) { 3070 merge->nsend++; 3071 nrows = 0; 3072 for (i=owners[proc]; i<owners[proc+1]; i++){ 3073 if (ai[i+1] > ai[i]) nrows++; 3074 } 3075 len_si[proc] = 2*(nrows+1); 3076 len += len_si[proc]; 3077 } 3078 } 3079 3080 /* determine the number and length of messages to receive for ij-structure */ 3081 /*-------------------------------------------------------------------------*/ 3082 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3083 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3084 3085 /* post the Irecv of j-structure */ 3086 /*-------------------------------*/ 3087 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3088 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3089 3090 /* post the Isend of j-structure */ 3091 /*--------------------------------*/ 3092 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3093 sj_waits = si_waits + merge->nsend; 3094 3095 for (proc=0, k=0; proc<size; proc++){ 3096 if (!len_s[proc]) continue; 3097 i = owners[proc]; 3098 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3099 k++; 3100 } 3101 3102 /* receives and sends of j-structure are complete */ 3103 /*------------------------------------------------*/ 3104 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3105 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3106 3107 /* send and recv i-structure */ 3108 /*---------------------------*/ 3109 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3110 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3111 3112 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3113 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3114 for (proc=0,k=0; proc<size; proc++){ 3115 if (!len_s[proc]) continue; 3116 /* form outgoing message for i-structure: 3117 buf_si[0]: nrows to be sent 3118 [1:nrows]: row index (global) 3119 [nrows+1:2*nrows+1]: i-structure index 3120 */ 3121 /*-------------------------------------------*/ 3122 nrows = len_si[proc]/2 - 1; 3123 buf_si_i = buf_si + nrows+1; 3124 buf_si[0] = nrows; 3125 buf_si_i[0] = 0; 3126 nrows = 0; 3127 for (i=owners[proc]; i<owners[proc+1]; i++){ 3128 anzi = ai[i+1] - ai[i]; 3129 if (anzi) { 3130 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3131 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3132 nrows++; 3133 } 3134 } 3135 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3136 k++; 3137 buf_si += len_si[proc]; 3138 } 3139 3140 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3141 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3142 3143 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3144 for (i=0; i<merge->nrecv; i++){ 3145 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3146 } 3147 3148 ierr = PetscFree(len_si);CHKERRQ(ierr); 3149 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3150 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3151 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3152 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3153 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3154 ierr = PetscFree(status);CHKERRQ(ierr); 3155 3156 /* compute a local seq matrix in each processor */ 3157 /*----------------------------------------------*/ 3158 /* allocate bi array and free space for accumulating nonzero column info */ 3159 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3160 bi[0] = 0; 3161 3162 /* create and initialize a linked list */ 3163 nlnk = N+1; 3164 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3165 3166 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3167 len = 0; 3168 len = ai[owners[rank+1]] - ai[owners[rank]]; 3169 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3170 current_space = free_space; 3171 3172 /* determine symbolic info for each local row */ 3173 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3174 nextrow = buf_ri_k + merge->nrecv; 3175 nextai = nextrow + merge->nrecv; 3176 for (k=0; k<merge->nrecv; k++){ 3177 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3178 nrows = *buf_ri_k[k]; 3179 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3180 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3181 } 3182 3183 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3184 len = 0; 3185 for (i=0;i<m;i++) { 3186 bnzi = 0; 3187 /* add local non-zero cols of this proc's seqmat into lnk */ 3188 arow = owners[rank] + i; 3189 anzi = ai[arow+1] - ai[arow]; 3190 aj = a->j + ai[arow]; 3191 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3192 bnzi += nlnk; 3193 /* add received col data into lnk */ 3194 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3195 if (i == *nextrow[k]) { /* i-th row */ 3196 anzi = *(nextai[k]+1) - *nextai[k]; 3197 aj = buf_rj[k] + *nextai[k]; 3198 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3199 bnzi += nlnk; 3200 nextrow[k]++; nextai[k]++; 3201 } 3202 } 3203 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3204 3205 /* if free space is not available, make more free space */ 3206 if (current_space->local_remaining<bnzi) { 3207 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3208 nspacedouble++; 3209 } 3210 /* copy data into free space, then initialize lnk */ 3211 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3212 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3213 3214 current_space->array += bnzi; 3215 current_space->local_used += bnzi; 3216 current_space->local_remaining -= bnzi; 3217 3218 bi[i+1] = bi[i] + bnzi; 3219 } 3220 3221 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3222 3223 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3224 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3225 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3226 3227 /* create symbolic parallel matrix B_mpi */ 3228 /*---------------------------------------*/ 3229 if (n==PETSC_DECIDE) { 3230 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3231 } else { 3232 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3233 } 3234 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3235 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3236 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3237 3238 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3239 B_mpi->assembled = PETSC_FALSE; 3240 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3241 merge->bi = bi; 3242 merge->bj = bj; 3243 merge->buf_ri = buf_ri; 3244 merge->buf_rj = buf_rj; 3245 merge->coi = PETSC_NULL; 3246 merge->coj = PETSC_NULL; 3247 merge->owners_co = PETSC_NULL; 3248 3249 /* attach the supporting struct to B_mpi for reuse */ 3250 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3251 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3252 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3253 *mpimat = B_mpi; 3254 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3255 PetscFunctionReturn(0); 3256 } 3257 3258 static PetscEvent logkey_seqstompi = 0; 3259 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3260 { 3261 PetscErrorCode ierr; 3262 3263 PetscFunctionBegin; 3264 if (!logkey_seqstompi) { 3265 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3266 } 3267 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3268 if (scall == MAT_INITIAL_MATRIX){ 3269 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3270 } 3271 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3272 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3273 PetscFunctionReturn(0); 3274 } 3275 static PetscEvent logkey_getlocalmat = 0; 3276 #undef __FUNCT__ 3277 #define __FUNCT__ "MatGetLocalMat" 3278 /*@C 3279 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3280 3281 Not Collective 3282 3283 Input Parameters: 3284 + A - the matrix 3285 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3286 3287 Output Parameter: 3288 . A_loc - the local sequential matrix generated 3289 3290 Level: developer 3291 3292 @*/ 3293 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3294 { 3295 PetscErrorCode ierr; 3296 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3297 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3298 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3299 MatScalar *aa=a->a,*ba=b->a,*ca; 3300 PetscInt am=A->m,i,j,k,*nnz,nnz_max,ncols,cstart=mpimat->cstart; 3301 Mat Aw; 3302 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3303 PetscInt stages[2]; 3304 3305 PetscFunctionBegin; 3306 ierr = PetscLogStageRegister(&stages[0],"LocalMat_init");CHKERRQ(ierr); 3307 ierr = PetscLogStageRegister(&stages[1],"LocalMat_ruse");CHKERRQ(ierr); 3308 3309 if (!logkey_getlocalmat) { 3310 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3311 } 3312 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3313 if (scall == MAT_INITIAL_MATRIX){ 3314 ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr); 3315 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 3316 nnz_max = 0; 3317 for (i=0; i<am; i++){ 3318 nnz[i] = (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3319 if (nnz[i] > nnz_max) nnz_max = nnz[i]; 3320 } 3321 ierr = MatCreate(PETSC_COMM_SELF,am,A->N,am,A->N,&Aw);CHKERRQ(ierr); 3322 ierr = MatSetType(Aw,MATSEQAIJ);CHKERRQ(ierr); 3323 ierr = MatSeqAIJSetPreallocation(Aw,0,nnz);CHKERRQ(ierr); 3324 ierr = MatSetOption(Aw,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 3325 3326 ierr = PetscMalloc((1+nnz_max)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3327 ierr = PetscMalloc((1+nnz_max)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 3328 for (i=0; i<am; i++) { 3329 ncols_o = bi[i+1] - bi[i]; 3330 ncols_d = ai[i+1] - ai[i]; 3331 ncols = ncols_d + ncols_o; 3332 k = 0; 3333 /* off-diagonal portion of A */ 3334 for (jo=0; jo<ncols_o; jo++) { 3335 col = cmap[*bj]; 3336 if (col >= cstart) break; 3337 cj[k] = col; bj++; 3338 ca[k++] = *ba++; 3339 } 3340 /* diagonal portion of A */ 3341 for (j=0; j<ncols_d; j++) { 3342 cj[k] = cstart + *aj++; 3343 ca[k++] = *aa++; 3344 } 3345 /* off-diagonal portion of A */ 3346 for (j=jo;j<ncols_o; j++) { 3347 cj[k] = cmap[*bj++]; 3348 ca[k++] = *ba++; 3349 } 3350 ierr = MatSetValues_SeqAIJ(Aw,1,&i,ncols,cj,ca,INSERT_VALUES);CHKERRQ(ierr); 3351 } 3352 ierr = MatAssemblyBegin(Aw,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3353 ierr = MatAssemblyEnd(Aw,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3354 ierr = PetscFree(cj);CHKERRQ(ierr); 3355 ierr = PetscFree(ca);CHKERRQ(ierr); 3356 ierr = PetscFree(nnz);CHKERRQ(ierr); 3357 *A_loc = Aw; 3358 ierr = PetscLogStagePop(); 3359 } else if (scall == MAT_REUSE_MATRIX){ 3360 ierr = PetscLogStagePush(stages[1]); 3361 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3362 ci = mat->i; cj = mat->j; ca = mat->a; 3363 for (i=0; i<am; i++) { 3364 /* off-diagonal portion of A */ 3365 ncols_o = bi[i+1] - bi[i]; 3366 for (jo=0; jo<ncols_o; jo++) { 3367 col = cmap[*bj]; 3368 if (col >= cstart) break; 3369 *ca++ = *ba++; 3370 } 3371 /* diagonal portion of A */ 3372 ncols = ai[i+1] - ai[i]; 3373 for (j=0; j<ncols; j++) *ca++ = *aa++; 3374 /* off-diagonal portion of A */ 3375 for (j=jo;j<ncols_o; j++) *ca++ = *ba++; 3376 } 3377 ierr = PetscLogStagePop(); 3378 } else { 3379 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3380 } 3381 3382 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3383 PetscFunctionReturn(0); 3384 } 3385 3386 static PetscEvent logkey_getlocalmatcondensed = 0; 3387 #undef __FUNCT__ 3388 #define __FUNCT__ "MatGetLocalMatCondensed" 3389 /*@C 3390 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3391 3392 Not Collective 3393 3394 Input Parameters: 3395 + A - the matrix 3396 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3397 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3398 3399 Output Parameter: 3400 . A_loc - the local sequential matrix generated 3401 3402 Level: developer 3403 3404 @*/ 3405 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3406 { 3407 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3408 PetscErrorCode ierr; 3409 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3410 IS isrowa,iscola; 3411 Mat *aloc; 3412 3413 PetscFunctionBegin; 3414 if (!logkey_getlocalmatcondensed) { 3415 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3416 } 3417 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3418 if (!row){ 3419 start = a->rstart; end = a->rend; 3420 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3421 } else { 3422 isrowa = *row; 3423 } 3424 if (!col){ 3425 start = a->cstart; 3426 cmap = a->garray; 3427 nzA = a->A->n; 3428 nzB = a->B->n; 3429 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3430 ncols = 0; 3431 for (i=0; i<nzB; i++) { 3432 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3433 else break; 3434 } 3435 imark = i; 3436 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3437 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3438 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3439 ierr = PetscFree(idx);CHKERRQ(ierr); 3440 } else { 3441 iscola = *col; 3442 } 3443 if (scall != MAT_INITIAL_MATRIX){ 3444 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3445 aloc[0] = *A_loc; 3446 } 3447 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3448 *A_loc = aloc[0]; 3449 ierr = PetscFree(aloc);CHKERRQ(ierr); 3450 if (!row){ 3451 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3452 } 3453 if (!col){ 3454 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3455 } 3456 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3457 PetscFunctionReturn(0); 3458 } 3459 3460 static PetscEvent logkey_GetBrowsOfAcols = 0; 3461 #undef __FUNCT__ 3462 #define __FUNCT__ "MatGetBrowsOfAcols" 3463 /*@C 3464 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3465 3466 Collective on Mat 3467 3468 Input Parameters: 3469 + A,B - the matrices in mpiaij format 3470 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3471 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3472 3473 Output Parameter: 3474 + rowb, colb - index sets of rows and columns of B to extract 3475 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3476 - B_seq - the sequential matrix generated 3477 3478 Level: developer 3479 3480 @*/ 3481 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3482 { 3483 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3484 PetscErrorCode ierr; 3485 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3486 IS isrowb,iscolb; 3487 Mat *bseq; 3488 3489 PetscFunctionBegin; 3490 if (a->cstart != b->rstart || a->cend != b->rend){ 3491 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3492 } 3493 if (!logkey_GetBrowsOfAcols) { 3494 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3495 } 3496 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3497 3498 if (scall == MAT_INITIAL_MATRIX){ 3499 start = a->cstart; 3500 cmap = a->garray; 3501 nzA = a->A->n; 3502 nzB = a->B->n; 3503 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3504 ncols = 0; 3505 for (i=0; i<nzB; i++) { /* row < local row index */ 3506 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3507 else break; 3508 } 3509 imark = i; 3510 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3511 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3512 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3513 ierr = PetscFree(idx);CHKERRQ(ierr); 3514 *brstart = imark; 3515 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3516 } else { 3517 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3518 isrowb = *rowb; iscolb = *colb; 3519 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3520 bseq[0] = *B_seq; 3521 } 3522 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3523 *B_seq = bseq[0]; 3524 ierr = PetscFree(bseq);CHKERRQ(ierr); 3525 if (!rowb){ 3526 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3527 } else { 3528 *rowb = isrowb; 3529 } 3530 if (!colb){ 3531 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3532 } else { 3533 *colb = iscolb; 3534 } 3535 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3536 PetscFunctionReturn(0); 3537 } 3538 3539 static PetscEvent logkey_GetBrowsOfAocols = 0; 3540 #undef __FUNCT__ 3541 #define __FUNCT__ "MatGetBrowsOfAoCols" 3542 /*@C 3543 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3544 of the OFF-DIAGONAL portion of local A 3545 3546 Collective on Mat 3547 3548 Input Parameters: 3549 + A,B - the matrices in mpiaij format 3550 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3551 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3552 3553 Output Parameter: 3554 + rowb, colb - index sets of rows and columns of B to extract 3555 . brstart - row index of B_seq from which the remaining rows correspond to A's columns that are greater than cend. 3556 - B_seq - the sequential matrix generated 3557 3558 Level: developer 3559 3560 @*/ 3561 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3562 { 3563 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3564 PetscErrorCode ierr; 3565 PetscInt *idx,i,start,ncols,nzB,*cmap; 3566 IS isrowb,iscolb; 3567 Mat *bseq; 3568 3569 PetscFunctionBegin; 3570 if (a->cstart != b->rstart || a->cend != b->rend){ 3571 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3572 } 3573 if (!logkey_GetBrowsOfAocols) { 3574 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3575 } 3576 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3577 3578 if (scall == MAT_INITIAL_MATRIX){ 3579 start = a->cstart; 3580 cmap = a->garray; 3581 nzB = a->B->n; 3582 if (!nzB){ /* output a null matrix */ 3583 B_seq = PETSC_NULL; 3584 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3585 PetscFunctionReturn(0); 3586 } 3587 ierr = PetscMalloc(nzB*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3588 ncols = 0; 3589 *brstart = nzB; 3590 for (i=0; i<nzB; i++) { /* row < local row index */ 3591 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3592 else break; 3593 } 3594 *brstart = i; 3595 for (i=*brstart; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3596 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3597 ierr = PetscFree(idx);CHKERRQ(ierr); 3598 3599 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3600 } else { 3601 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3602 isrowb = *rowb; iscolb = *colb; 3603 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3604 bseq[0] = *B_seq; 3605 } 3606 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3607 *B_seq = bseq[0]; 3608 ierr = PetscFree(bseq);CHKERRQ(ierr); 3609 if (!rowb){ 3610 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3611 } else { 3612 *rowb = isrowb; 3613 } 3614 if (!colb){ 3615 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3616 } else { 3617 *colb = iscolb; 3618 } 3619 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3620 PetscFunctionReturn(0); 3621 } 3622 3623 /*MC 3624 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3625 3626 Options Database Keys: 3627 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3628 3629 Level: beginner 3630 3631 .seealso: MatCreateMPIAIJ 3632 M*/ 3633 3634 EXTERN_C_BEGIN 3635 #undef __FUNCT__ 3636 #define __FUNCT__ "MatCreate_MPIAIJ" 3637 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3638 { 3639 Mat_MPIAIJ *b; 3640 PetscErrorCode ierr; 3641 PetscInt i; 3642 PetscMPIInt size; 3643 3644 PetscFunctionBegin; 3645 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3646 3647 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3648 B->data = (void*)b; 3649 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3650 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3651 B->factor = 0; 3652 B->assembled = PETSC_FALSE; 3653 B->mapping = 0; 3654 3655 B->insertmode = NOT_SET_VALUES; 3656 b->size = size; 3657 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3658 3659 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3660 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3661 3662 /* the information in the maps duplicates the information computed below, eventually 3663 we should remove the duplicate information that is not contained in the maps */ 3664 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3665 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3666 3667 /* build local table of row and column ownerships */ 3668 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3669 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3670 b->cowners = b->rowners + b->size + 2; 3671 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3672 b->rowners[0] = 0; 3673 for (i=2; i<=b->size; i++) { 3674 b->rowners[i] += b->rowners[i-1]; 3675 } 3676 b->rstart = b->rowners[b->rank]; 3677 b->rend = b->rowners[b->rank+1]; 3678 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3679 b->cowners[0] = 0; 3680 for (i=2; i<=b->size; i++) { 3681 b->cowners[i] += b->cowners[i-1]; 3682 } 3683 b->cstart = b->cowners[b->rank]; 3684 b->cend = b->cowners[b->rank+1]; 3685 3686 /* build cache for off array entries formed */ 3687 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3688 b->donotstash = PETSC_FALSE; 3689 b->colmap = 0; 3690 b->garray = 0; 3691 b->roworiented = PETSC_TRUE; 3692 3693 /* stuff used for matrix vector multiply */ 3694 b->lvec = PETSC_NULL; 3695 b->Mvctx = PETSC_NULL; 3696 3697 /* stuff for MatGetRow() */ 3698 b->rowindices = 0; 3699 b->rowvalues = 0; 3700 b->getrowactive = PETSC_FALSE; 3701 3702 /* Explicitly create 2 MATSEQAIJ matrices. */ 3703 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3704 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3705 PetscLogObjectParent(B,b->A); 3706 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3707 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3708 PetscLogObjectParent(B,b->B); 3709 3710 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3711 "MatStoreValues_MPIAIJ", 3712 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3713 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3714 "MatRetrieveValues_MPIAIJ", 3715 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3716 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3717 "MatGetDiagonalBlock_MPIAIJ", 3718 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3719 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3720 "MatIsTranspose_MPIAIJ", 3721 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3722 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3723 "MatMPIAIJSetPreallocation_MPIAIJ", 3724 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3725 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3726 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3727 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3728 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3729 "MatDiagonalScaleLocal_MPIAIJ", 3730 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3731 PetscFunctionReturn(0); 3732 } 3733 EXTERN_C_END 3734