1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 58 if (nonew == 1) goto a_noinsert; \ 59 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 60 if (nrow >= rmax) { \ 61 /* there is no extra room in row, therefore enlarge */ \ 62 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 63 PetscScalar *new_a; \ 64 \ 65 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 66 \ 67 /* malloc new storage space */ \ 68 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 69 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 70 new_j = (PetscInt*)(new_a + new_nz); \ 71 new_i = new_j + new_nz; \ 72 \ 73 /* copy over old data into new slots */ \ 74 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 75 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 76 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 77 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 78 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 79 len*sizeof(PetscInt));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 82 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 83 /* free up old matrix storage */ \ 84 \ 85 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 86 if (!a->singlemalloc) { \ 87 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 88 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 89 } \ 90 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 91 a->singlemalloc = PETSC_TRUE; \ 92 \ 93 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 94 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 95 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \ 96 a->maxnz += CHUNKSIZE; \ 97 a->reallocs++; \ 98 } \ 99 N = nrow++ - 1; a->nz++; \ 100 /* shift up all the later entries in this row */ \ 101 for (ii=N; ii>=_i; ii--) { \ 102 rp[ii+1] = rp[ii]; \ 103 ap[ii+1] = ap[ii]; \ 104 } \ 105 rp[_i] = col1; \ 106 ap[_i] = value; \ 107 a_noinsert: ; \ 108 ailen[row] = nrow; \ 109 } 110 111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 112 { \ 113 \ 114 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 115 rmax = bimax[row]; nrow = bilen[row]; \ 116 col1 = col - shift; \ 117 \ 118 low = 0; high = nrow; \ 119 while (high-low > 5) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > col) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > col1) break; \ 126 if (rp[_i] == col1) { \ 127 if (addv == ADD_VALUES) ap[_i] += value; \ 128 else ap[_i] = value; \ 129 goto b_noinsert; \ 130 } \ 131 } \ 132 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 133 if (nonew == 1) goto b_noinsert; \ 134 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 if (nrow >= rmax) { \ 136 /* there is no extra room in row, therefore enlarge */ \ 137 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 138 PetscScalar *new_a; \ 139 \ 140 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 141 \ 142 /* malloc new storage space */ \ 143 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 144 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 145 new_j = (PetscInt*)(new_a + new_nz); \ 146 new_i = new_j + new_nz; \ 147 \ 148 /* copy over old data into new slots */ \ 149 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 150 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 151 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 152 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 153 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 154 len*sizeof(PetscInt));CHKERRQ(ierr); \ 155 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 157 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 158 /* free up old matrix storage */ \ 159 \ 160 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 161 if (!b->singlemalloc) { \ 162 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 163 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 164 } \ 165 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 166 b->singlemalloc = PETSC_TRUE; \ 167 \ 168 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 169 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 170 ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \ 171 b->maxnz += CHUNKSIZE; \ 172 b->reallocs++; \ 173 } \ 174 N = nrow++ - 1; b->nz++; \ 175 /* shift up all the later entries in this row */ \ 176 for (ii=N; ii>=_i; ii--) { \ 177 rp[ii+1] = rp[ii]; \ 178 ap[ii+1] = ap[ii]; \ 179 } \ 180 rp[_i] = col1; \ 181 ap[_i] = value; \ 182 b_noinsert: ; \ 183 bilen[row] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIAIJ" 188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 191 PetscScalar value; 192 PetscErrorCode ierr; 193 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 194 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 195 PetscTruth roworiented = aij->roworiented; 196 197 /* Some Variables required in the macro */ 198 Mat A = aij->A; 199 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 200 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 201 PetscScalar *aa = a->a; 202 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 203 Mat B = aij->B; 204 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 205 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 206 PetscScalar *ba = b->a; 207 208 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 209 PetscInt nonew = a->nonew,shift=0; 210 PetscScalar *ap; 211 212 PetscFunctionBegin; 213 for (i=0; i<m; i++) { 214 if (im[i] < 0) continue; 215 #if defined(PETSC_USE_DEBUG) 216 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 217 #endif 218 if (im[i] >= rstart && im[i] < rend) { 219 row = im[i] - rstart; 220 for (j=0; j<n; j++) { 221 if (in[j] >= cstart && in[j] < cend){ 222 col = in[j] - cstart; 223 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 224 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 225 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 226 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 227 } else if (in[j] < 0) continue; 228 #if defined(PETSC_USE_DEBUG) 229 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 230 #endif 231 else { 232 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 233 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 234 if (mat->was_assembled) { 235 if (!aij->colmap) { 236 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 240 col--; 241 #else 242 col = aij->colmap[in[j]] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 245 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 248 B = aij->B; 249 b = (Mat_SeqAIJ*)B->data; 250 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 251 ba = b->a; 252 } 253 } else col = in[j]; 254 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (!aij->donotstash) { 260 if (roworiented) { 261 if (ignorezeroentries && v[i*n] == 0.0) continue; 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 263 } else { 264 if (ignorezeroentries && v[i] == 0.0) continue; 265 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 266 } 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatGetValues_MPIAIJ" 275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 276 { 277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 280 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 281 282 PetscFunctionBegin; 283 for (i=0; i<m; i++) { 284 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 285 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 286 if (idxm[i] >= rstart && idxm[i] < rend) { 287 row = idxm[i] - rstart; 288 for (j=0; j<n; j++) { 289 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 290 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 291 if (idxn[j] >= cstart && idxn[j] < cend){ 292 col = idxn[j] - cstart; 293 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 294 } else { 295 if (!aij->colmap) { 296 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 297 } 298 #if defined (PETSC_USE_CTABLE) 299 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 300 col --; 301 #else 302 col = aij->colmap[idxn[j]] - 1; 303 #endif 304 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 305 else { 306 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 307 } 308 } 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 312 } 313 } 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 320 { 321 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 322 PetscErrorCode ierr; 323 PetscInt nstash,reallocs; 324 InsertMode addv; 325 326 PetscFunctionBegin; 327 if (aij->donotstash) { 328 PetscFunctionReturn(0); 329 } 330 331 /* make sure all processors are either in INSERTMODE or ADDMODE */ 332 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 333 if (addv == (ADD_VALUES|INSERT_VALUES)) { 334 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 335 } 336 mat->insertmode = addv; /* in case this processor had no cache */ 337 338 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 339 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 340 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 347 { 348 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 349 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 350 PetscErrorCode ierr; 351 PetscMPIInt n; 352 PetscInt i,j,rstart,ncols,flg; 353 PetscInt *row,*col,other_disassembled; 354 PetscScalar *val; 355 InsertMode addv = mat->insertmode; 356 357 PetscFunctionBegin; 358 if (!aij->donotstash) { 359 while (1) { 360 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 361 if (!flg) break; 362 363 for (i=0; i<n;) { 364 /* Now identify the consecutive vals belonging to the same row */ 365 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 366 if (j < n) ncols = j-i; 367 else ncols = n-i; 368 /* Now assemble all these values with a single function call */ 369 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 370 i = j; 371 } 372 } 373 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 374 } 375 a->compressedrow.use = PETSC_FALSE; 376 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 377 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 378 379 /* determine if any processor has disassembled, if so we must 380 also disassemble ourselfs, in order that we may reassemble. */ 381 /* 382 if nonzero structure of submatrix B cannot change then we know that 383 no processor disassembled thus we can skip this stuff 384 */ 385 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 386 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 387 if (mat->was_assembled && !other_disassembled) { 388 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 389 /* reaccess the b because aij->B was changed */ 390 b = (Mat_SeqAIJ *)aij->B->data; 391 } 392 } 393 394 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 395 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 396 } 397 398 b->inode.use = PETSC_FALSE; 399 b->compressedrow.use = PETSC_TRUE; 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNCT__ 416 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 418 { 419 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 420 PetscErrorCode ierr; 421 422 PetscFunctionBegin; 423 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 424 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatZeroRows_MPIAIJ" 430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 431 { 432 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 433 PetscErrorCode ierr; 434 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 435 PetscInt i,N,*rows,*owners = l->rowners; 436 PetscInt *nprocs,j,idx,nsends,row; 437 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 438 PetscInt *rvalues,count,base,slen,*source; 439 PetscInt *lens,*lrows,*values,rstart=l->rstart; 440 MPI_Comm comm = A->comm; 441 MPI_Request *send_waits,*recv_waits; 442 MPI_Status recv_status,*send_status; 443 IS istmp; 444 #if defined(PETSC_DEBUG) 445 PetscTruth found = PETSC_FALSE; 446 #endif 447 448 PetscFunctionBegin; 449 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 450 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 451 452 /* first count number of contributors to each processor */ 453 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 454 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 455 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 456 j = 0; 457 for (i=0; i<N; i++) { 458 if (lastidx > (idx = rows[i])) j = 0; 459 lastidx = idx; 460 for (; j<size; j++) { 461 if (idx >= owners[j] && idx < owners[j+1]) { 462 nprocs[2*j]++; 463 nprocs[2*j+1] = 1; 464 owner[i] = j; 465 #if defined(PETSC_DEBUG) 466 found = PETSC_TRUE; 467 #endif 468 break; 469 } 470 } 471 #if defined(PETSC_DEBUG) 472 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 473 found = PETSC_FALSE; 474 #endif 475 } 476 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 477 478 /* inform other processors of number of messages and max length*/ 479 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 480 481 /* post receives: */ 482 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 483 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 484 for (i=0; i<nrecvs; i++) { 485 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 486 } 487 488 /* do sends: 489 1) starts[i] gives the starting index in svalues for stuff going to 490 the ith processor 491 */ 492 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 493 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 494 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 495 starts[0] = 0; 496 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 497 for (i=0; i<N; i++) { 498 svalues[starts[owner[i]]++] = rows[i]; 499 } 500 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 501 502 starts[0] = 0; 503 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 504 count = 0; 505 for (i=0; i<size; i++) { 506 if (nprocs[2*i+1]) { 507 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 508 } 509 } 510 ierr = PetscFree(starts);CHKERRQ(ierr); 511 512 base = owners[rank]; 513 514 /* wait on receives */ 515 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 516 source = lens + nrecvs; 517 count = nrecvs; slen = 0; 518 while (count) { 519 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 520 /* unpack receives into our local space */ 521 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 522 source[imdex] = recv_status.MPI_SOURCE; 523 lens[imdex] = n; 524 slen += n; 525 count--; 526 } 527 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 528 529 /* move the data into the send scatter */ 530 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 531 count = 0; 532 for (i=0; i<nrecvs; i++) { 533 values = rvalues + i*nmax; 534 for (j=0; j<lens[i]; j++) { 535 lrows[count++] = values[j] - base; 536 } 537 } 538 ierr = PetscFree(rvalues);CHKERRQ(ierr); 539 ierr = PetscFree(lens);CHKERRQ(ierr); 540 ierr = PetscFree(owner);CHKERRQ(ierr); 541 ierr = PetscFree(nprocs);CHKERRQ(ierr); 542 543 /* actually zap the local rows */ 544 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 545 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 546 547 /* 548 Zero the required rows. If the "diagonal block" of the matrix 549 is square and the user wishes to set the diagonal we use seperate 550 code so that MatSetValues() is not called for each diagonal allocating 551 new memory, thus calling lots of mallocs and slowing things down. 552 553 Contributed by: Mathew Knepley 554 */ 555 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 556 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 557 if (diag && (l->A->M == l->A->N)) { 558 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 559 } else if (diag) { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 562 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 564 } 565 for (i = 0; i < slen; i++) { 566 row = lrows[i] + rstart; 567 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 568 } 569 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 570 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 571 } else { 572 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 573 } 574 ierr = ISDestroy(istmp);CHKERRQ(ierr); 575 ierr = PetscFree(lrows);CHKERRQ(ierr); 576 577 /* wait on sends */ 578 if (nsends) { 579 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 580 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 581 ierr = PetscFree(send_status);CHKERRQ(ierr); 582 } 583 ierr = PetscFree(send_waits);CHKERRQ(ierr); 584 ierr = PetscFree(svalues);CHKERRQ(ierr); 585 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMult_MPIAIJ" 591 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 PetscErrorCode ierr; 595 PetscInt nt; 596 597 PetscFunctionBegin; 598 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 599 if (nt != A->n) { 600 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 601 } 602 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 603 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 604 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 605 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 606 PetscFunctionReturn(0); 607 } 608 609 #undef __FUNCT__ 610 #define __FUNCT__ "MatMultAdd_MPIAIJ" 611 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 612 { 613 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 614 PetscErrorCode ierr; 615 616 PetscFunctionBegin; 617 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 618 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 619 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 620 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 626 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 627 { 628 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 629 PetscErrorCode ierr; 630 PetscTruth merged; 631 632 PetscFunctionBegin; 633 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 634 /* do nondiagonal part */ 635 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 636 if (!merged) { 637 /* send it on its way */ 638 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 639 /* do local part */ 640 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 641 /* receive remote parts: note this assumes the values are not actually */ 642 /* added in yy until the next line, */ 643 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 } else { 645 /* do local part */ 646 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 647 /* send it on its way */ 648 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 649 /* values actually were received in the Begin() but we need to call this nop */ 650 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 651 } 652 PetscFunctionReturn(0); 653 } 654 655 EXTERN_C_BEGIN 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 658 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 659 { 660 MPI_Comm comm; 661 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 662 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 663 IS Me,Notme; 664 PetscErrorCode ierr; 665 PetscInt M,N,first,last,*notme,i; 666 PetscMPIInt size; 667 668 PetscFunctionBegin; 669 670 /* Easy test: symmetric diagonal block */ 671 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 672 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 673 if (!*f) PetscFunctionReturn(0); 674 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 675 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 676 if (size == 1) PetscFunctionReturn(0); 677 678 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 679 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 680 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 681 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 682 for (i=0; i<first; i++) notme[i] = i; 683 for (i=last; i<M; i++) notme[i-last+first] = i; 684 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 685 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 686 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 687 Aoff = Aoffs[0]; 688 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 689 Boff = Boffs[0]; 690 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 691 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 692 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 693 ierr = ISDestroy(Me);CHKERRQ(ierr); 694 ierr = ISDestroy(Notme);CHKERRQ(ierr); 695 696 PetscFunctionReturn(0); 697 } 698 EXTERN_C_END 699 700 #undef __FUNCT__ 701 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 702 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 703 { 704 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 705 PetscErrorCode ierr; 706 707 PetscFunctionBegin; 708 /* do nondiagonal part */ 709 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 710 /* send it on its way */ 711 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 712 /* do local part */ 713 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 714 /* receive remote parts */ 715 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 719 /* 720 This only works correctly for square matrices where the subblock A->A is the 721 diagonal block 722 */ 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 725 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 726 { 727 PetscErrorCode ierr; 728 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 729 730 PetscFunctionBegin; 731 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 732 if (a->rstart != a->cstart || a->rend != a->cend) { 733 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 734 } 735 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatScale_MPIAIJ" 741 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 742 { 743 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 744 PetscErrorCode ierr; 745 746 PetscFunctionBegin; 747 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 748 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNCT__ 753 #define __FUNCT__ "MatDestroy_MPIAIJ" 754 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 757 PetscErrorCode ierr; 758 759 PetscFunctionBegin; 760 #if defined(PETSC_USE_LOG) 761 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 762 #endif 763 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 764 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 765 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 766 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 767 #if defined (PETSC_USE_CTABLE) 768 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 769 #else 770 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 771 #endif 772 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 773 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 774 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 775 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 776 ierr = PetscFree(aij);CHKERRQ(ierr); 777 778 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 779 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 780 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 781 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 782 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 783 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 784 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatView_MPIAIJ_Binary" 790 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 791 { 792 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 793 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 794 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 795 PetscErrorCode ierr; 796 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 797 int fd; 798 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 799 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 800 PetscScalar *column_values; 801 802 PetscFunctionBegin; 803 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 804 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 805 nz = A->nz + B->nz; 806 if (!rank) { 807 header[0] = MAT_FILE_COOKIE; 808 header[1] = mat->M; 809 header[2] = mat->N; 810 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 811 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 812 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 813 /* get largest number of rows any processor has */ 814 rlen = mat->m; 815 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 816 for (i=1; i<size; i++) { 817 rlen = PetscMax(rlen,range[i+1] - range[i]); 818 } 819 } else { 820 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 821 rlen = mat->m; 822 } 823 824 /* load up the local row counts */ 825 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 826 for (i=0; i<mat->m; i++) { 827 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 828 } 829 830 /* store the row lengths to the file */ 831 if (!rank) { 832 MPI_Status status; 833 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 834 for (i=1; i<size; i++) { 835 rlen = range[i+1] - range[i]; 836 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 837 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 838 } 839 } else { 840 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 841 } 842 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 843 844 /* load up the local column indices */ 845 nzmax = nz; /* )th processor needs space a largest processor needs */ 846 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 847 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 848 cnt = 0; 849 for (i=0; i<mat->m; i++) { 850 for (j=B->i[i]; j<B->i[i+1]; j++) { 851 if ( (col = garray[B->j[j]]) > cstart) break; 852 column_indices[cnt++] = col; 853 } 854 for (k=A->i[i]; k<A->i[i+1]; k++) { 855 column_indices[cnt++] = A->j[k] + cstart; 856 } 857 for (; j<B->i[i+1]; j++) { 858 column_indices[cnt++] = garray[B->j[j]]; 859 } 860 } 861 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 862 863 /* store the column indices to the file */ 864 if (!rank) { 865 MPI_Status status; 866 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 867 for (i=1; i<size; i++) { 868 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 869 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 870 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 871 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 872 } 873 } else { 874 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 875 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 876 } 877 ierr = PetscFree(column_indices);CHKERRQ(ierr); 878 879 /* load up the local column values */ 880 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 881 cnt = 0; 882 for (i=0; i<mat->m; i++) { 883 for (j=B->i[i]; j<B->i[i+1]; j++) { 884 if ( garray[B->j[j]] > cstart) break; 885 column_values[cnt++] = B->a[j]; 886 } 887 for (k=A->i[i]; k<A->i[i+1]; k++) { 888 column_values[cnt++] = A->a[k]; 889 } 890 for (; j<B->i[i+1]; j++) { 891 column_values[cnt++] = B->a[j]; 892 } 893 } 894 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 895 896 /* store the column values to the file */ 897 if (!rank) { 898 MPI_Status status; 899 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 900 for (i=1; i<size; i++) { 901 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 902 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 903 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 904 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 905 } 906 } else { 907 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 908 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 909 } 910 ierr = PetscFree(column_values);CHKERRQ(ierr); 911 PetscFunctionReturn(0); 912 } 913 914 #undef __FUNCT__ 915 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 916 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 917 { 918 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 919 PetscErrorCode ierr; 920 PetscMPIInt rank = aij->rank,size = aij->size; 921 PetscTruth isdraw,iascii,isbinary; 922 PetscViewer sviewer; 923 PetscViewerFormat format; 924 925 PetscFunctionBegin; 926 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 927 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 928 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 929 if (iascii) { 930 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 931 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 932 MatInfo info; 933 Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data; 934 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 935 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 936 if (!a->inode.size) { 937 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 938 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 939 } else { 940 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 941 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 942 } 943 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 944 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 945 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 946 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 947 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 948 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } else if (format == PETSC_VIEWER_ASCII_INFO) { 951 Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data; 952 if (a->inode.size) { 953 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",a->inode.node_count,a->inode.limit);CHKERRQ(ierr); 954 } else { 955 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 956 } 957 PetscFunctionReturn(0); 958 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 959 PetscFunctionReturn(0); 960 } 961 } else if (isbinary) { 962 if (size == 1) { 963 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 964 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 965 } else { 966 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 967 } 968 PetscFunctionReturn(0); 969 } else if (isdraw) { 970 PetscDraw draw; 971 PetscTruth isnull; 972 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 973 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 974 } 975 976 if (size == 1) { 977 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 978 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 979 } else { 980 /* assemble the entire matrix onto first processor. */ 981 Mat A; 982 Mat_SeqAIJ *Aloc; 983 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 984 PetscScalar *a; 985 986 if (!rank) { 987 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 988 } else { 989 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 990 } 991 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 992 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 993 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 994 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 995 996 /* copy over the A part */ 997 Aloc = (Mat_SeqAIJ*)aij->A->data; 998 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 999 row = aij->rstart; 1000 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 1001 for (i=0; i<m; i++) { 1002 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1003 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1004 } 1005 aj = Aloc->j; 1006 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 1007 1008 /* copy over the B part */ 1009 Aloc = (Mat_SeqAIJ*)aij->B->data; 1010 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1011 row = aij->rstart; 1012 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1013 ct = cols; 1014 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1015 for (i=0; i<m; i++) { 1016 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1017 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1018 } 1019 ierr = PetscFree(ct);CHKERRQ(ierr); 1020 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1021 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1022 /* 1023 Everyone has to call to draw the matrix since the graphics waits are 1024 synchronized across all processors that share the PetscDraw object 1025 */ 1026 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1027 if (!rank) { 1028 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1029 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1030 } 1031 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1032 ierr = MatDestroy(A);CHKERRQ(ierr); 1033 } 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ" 1039 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1040 { 1041 PetscErrorCode ierr; 1042 PetscTruth iascii,isdraw,issocket,isbinary; 1043 1044 PetscFunctionBegin; 1045 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1046 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1047 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1048 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1049 if (iascii || isdraw || isbinary || issocket) { 1050 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1051 } else { 1052 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1053 } 1054 PetscFunctionReturn(0); 1055 } 1056 1057 1058 1059 #undef __FUNCT__ 1060 #define __FUNCT__ "MatRelax_MPIAIJ" 1061 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1062 { 1063 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1064 PetscErrorCode ierr; 1065 Vec bb1; 1066 PetscScalar mone=-1.0; 1067 1068 PetscFunctionBegin; 1069 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1070 1071 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1072 1073 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1074 if (flag & SOR_ZERO_INITIAL_GUESS) { 1075 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1076 its--; 1077 } 1078 1079 while (its--) { 1080 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1081 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1082 1083 /* update rhs: bb1 = bb - B*x */ 1084 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1085 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1086 1087 /* local sweep */ 1088 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1089 CHKERRQ(ierr); 1090 } 1091 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1092 if (flag & SOR_ZERO_INITIAL_GUESS) { 1093 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1094 its--; 1095 } 1096 while (its--) { 1097 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1098 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1099 1100 /* update rhs: bb1 = bb - B*x */ 1101 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1102 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1103 1104 /* local sweep */ 1105 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1106 CHKERRQ(ierr); 1107 } 1108 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1109 if (flag & SOR_ZERO_INITIAL_GUESS) { 1110 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1111 its--; 1112 } 1113 while (its--) { 1114 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1115 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1116 1117 /* update rhs: bb1 = bb - B*x */ 1118 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1119 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1120 1121 /* local sweep */ 1122 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1123 CHKERRQ(ierr); 1124 } 1125 } else { 1126 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1127 } 1128 1129 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1135 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1136 { 1137 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1138 Mat A = mat->A,B = mat->B; 1139 PetscErrorCode ierr; 1140 PetscReal isend[5],irecv[5]; 1141 1142 PetscFunctionBegin; 1143 info->block_size = 1.0; 1144 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1145 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1146 isend[3] = info->memory; isend[4] = info->mallocs; 1147 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1148 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1149 isend[3] += info->memory; isend[4] += info->mallocs; 1150 if (flag == MAT_LOCAL) { 1151 info->nz_used = isend[0]; 1152 info->nz_allocated = isend[1]; 1153 info->nz_unneeded = isend[2]; 1154 info->memory = isend[3]; 1155 info->mallocs = isend[4]; 1156 } else if (flag == MAT_GLOBAL_MAX) { 1157 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1158 info->nz_used = irecv[0]; 1159 info->nz_allocated = irecv[1]; 1160 info->nz_unneeded = irecv[2]; 1161 info->memory = irecv[3]; 1162 info->mallocs = irecv[4]; 1163 } else if (flag == MAT_GLOBAL_SUM) { 1164 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1165 info->nz_used = irecv[0]; 1166 info->nz_allocated = irecv[1]; 1167 info->nz_unneeded = irecv[2]; 1168 info->memory = irecv[3]; 1169 info->mallocs = irecv[4]; 1170 } 1171 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1172 info->fill_ratio_needed = 0; 1173 info->factor_mallocs = 0; 1174 info->rows_global = (double)matin->M; 1175 info->columns_global = (double)matin->N; 1176 info->rows_local = (double)matin->m; 1177 info->columns_local = (double)matin->N; 1178 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatSetOption_MPIAIJ" 1184 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1185 { 1186 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 switch (op) { 1191 case MAT_NO_NEW_NONZERO_LOCATIONS: 1192 case MAT_YES_NEW_NONZERO_LOCATIONS: 1193 case MAT_COLUMNS_UNSORTED: 1194 case MAT_COLUMNS_SORTED: 1195 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1196 case MAT_KEEP_ZEROED_ROWS: 1197 case MAT_NEW_NONZERO_LOCATION_ERR: 1198 case MAT_USE_INODES: 1199 case MAT_DO_NOT_USE_INODES: 1200 case MAT_IGNORE_ZERO_ENTRIES: 1201 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1202 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1203 break; 1204 case MAT_ROW_ORIENTED: 1205 a->roworiented = PETSC_TRUE; 1206 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1207 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1208 break; 1209 case MAT_ROWS_SORTED: 1210 case MAT_ROWS_UNSORTED: 1211 case MAT_YES_NEW_DIAGONALS: 1212 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1213 break; 1214 case MAT_COLUMN_ORIENTED: 1215 a->roworiented = PETSC_FALSE; 1216 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1217 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1218 break; 1219 case MAT_IGNORE_OFF_PROC_ENTRIES: 1220 a->donotstash = PETSC_TRUE; 1221 break; 1222 case MAT_NO_NEW_DIAGONALS: 1223 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1224 case MAT_SYMMETRIC: 1225 case MAT_STRUCTURALLY_SYMMETRIC: 1226 case MAT_HERMITIAN: 1227 case MAT_SYMMETRY_ETERNAL: 1228 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1229 break; 1230 case MAT_NOT_SYMMETRIC: 1231 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1232 case MAT_NOT_HERMITIAN: 1233 case MAT_NOT_SYMMETRY_ETERNAL: 1234 break; 1235 default: 1236 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1237 } 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetRow_MPIAIJ" 1243 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1244 { 1245 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1246 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1247 PetscErrorCode ierr; 1248 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1249 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1250 PetscInt *cmap,*idx_p; 1251 1252 PetscFunctionBegin; 1253 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1254 mat->getrowactive = PETSC_TRUE; 1255 1256 if (!mat->rowvalues && (idx || v)) { 1257 /* 1258 allocate enough space to hold information from the longest row. 1259 */ 1260 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1261 PetscInt max = 1,tmp; 1262 for (i=0; i<matin->m; i++) { 1263 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1264 if (max < tmp) { max = tmp; } 1265 } 1266 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1267 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1268 } 1269 1270 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1271 lrow = row - rstart; 1272 1273 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1274 if (!v) {pvA = 0; pvB = 0;} 1275 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1276 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1277 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1278 nztot = nzA + nzB; 1279 1280 cmap = mat->garray; 1281 if (v || idx) { 1282 if (nztot) { 1283 /* Sort by increasing column numbers, assuming A and B already sorted */ 1284 PetscInt imark = -1; 1285 if (v) { 1286 *v = v_p = mat->rowvalues; 1287 for (i=0; i<nzB; i++) { 1288 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1289 else break; 1290 } 1291 imark = i; 1292 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1293 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1294 } 1295 if (idx) { 1296 *idx = idx_p = mat->rowindices; 1297 if (imark > -1) { 1298 for (i=0; i<imark; i++) { 1299 idx_p[i] = cmap[cworkB[i]]; 1300 } 1301 } else { 1302 for (i=0; i<nzB; i++) { 1303 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1304 else break; 1305 } 1306 imark = i; 1307 } 1308 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1309 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1310 } 1311 } else { 1312 if (idx) *idx = 0; 1313 if (v) *v = 0; 1314 } 1315 } 1316 *nz = nztot; 1317 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1318 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1324 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1325 { 1326 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1327 1328 PetscFunctionBegin; 1329 if (!aij->getrowactive) { 1330 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1331 } 1332 aij->getrowactive = PETSC_FALSE; 1333 PetscFunctionReturn(0); 1334 } 1335 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatNorm_MPIAIJ" 1338 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1339 { 1340 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1341 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1342 PetscErrorCode ierr; 1343 PetscInt i,j,cstart = aij->cstart; 1344 PetscReal sum = 0.0; 1345 PetscScalar *v; 1346 1347 PetscFunctionBegin; 1348 if (aij->size == 1) { 1349 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1350 } else { 1351 if (type == NORM_FROBENIUS) { 1352 v = amat->a; 1353 for (i=0; i<amat->nz; i++) { 1354 #if defined(PETSC_USE_COMPLEX) 1355 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1356 #else 1357 sum += (*v)*(*v); v++; 1358 #endif 1359 } 1360 v = bmat->a; 1361 for (i=0; i<bmat->nz; i++) { 1362 #if defined(PETSC_USE_COMPLEX) 1363 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1364 #else 1365 sum += (*v)*(*v); v++; 1366 #endif 1367 } 1368 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1369 *norm = sqrt(*norm); 1370 } else if (type == NORM_1) { /* max column norm */ 1371 PetscReal *tmp,*tmp2; 1372 PetscInt *jj,*garray = aij->garray; 1373 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1374 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1375 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1376 *norm = 0.0; 1377 v = amat->a; jj = amat->j; 1378 for (j=0; j<amat->nz; j++) { 1379 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1380 } 1381 v = bmat->a; jj = bmat->j; 1382 for (j=0; j<bmat->nz; j++) { 1383 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1384 } 1385 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1386 for (j=0; j<mat->N; j++) { 1387 if (tmp2[j] > *norm) *norm = tmp2[j]; 1388 } 1389 ierr = PetscFree(tmp);CHKERRQ(ierr); 1390 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1391 } else if (type == NORM_INFINITY) { /* max row norm */ 1392 PetscReal ntemp = 0.0; 1393 for (j=0; j<aij->A->m; j++) { 1394 v = amat->a + amat->i[j]; 1395 sum = 0.0; 1396 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1397 sum += PetscAbsScalar(*v); v++; 1398 } 1399 v = bmat->a + bmat->i[j]; 1400 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1401 sum += PetscAbsScalar(*v); v++; 1402 } 1403 if (sum > ntemp) ntemp = sum; 1404 } 1405 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1406 } else { 1407 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1408 } 1409 } 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatTranspose_MPIAIJ" 1415 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1416 { 1417 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1418 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1419 PetscErrorCode ierr; 1420 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1421 Mat B; 1422 PetscScalar *array; 1423 1424 PetscFunctionBegin; 1425 if (!matout && M != N) { 1426 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1427 } 1428 1429 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1430 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1431 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1432 1433 /* copy over the A part */ 1434 Aloc = (Mat_SeqAIJ*)a->A->data; 1435 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1436 row = a->rstart; 1437 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1438 for (i=0; i<m; i++) { 1439 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1440 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1441 } 1442 aj = Aloc->j; 1443 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1444 1445 /* copy over the B part */ 1446 Aloc = (Mat_SeqAIJ*)a->B->data; 1447 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1448 row = a->rstart; 1449 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1450 ct = cols; 1451 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1452 for (i=0; i<m; i++) { 1453 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1454 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1455 } 1456 ierr = PetscFree(ct);CHKERRQ(ierr); 1457 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1458 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1459 if (matout) { 1460 *matout = B; 1461 } else { 1462 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1463 } 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1469 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1470 { 1471 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1472 Mat a = aij->A,b = aij->B; 1473 PetscErrorCode ierr; 1474 PetscInt s1,s2,s3; 1475 1476 PetscFunctionBegin; 1477 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1478 if (rr) { 1479 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1480 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1481 /* Overlap communication with computation. */ 1482 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1483 } 1484 if (ll) { 1485 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1486 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1487 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1488 } 1489 /* scale the diagonal block */ 1490 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1491 1492 if (rr) { 1493 /* Do a scatter end and then right scale the off-diagonal block */ 1494 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1495 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1496 } 1497 1498 PetscFunctionReturn(0); 1499 } 1500 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1504 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1505 { 1506 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 if (!a->rank) { 1511 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1512 } 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1518 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1519 { 1520 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1521 PetscErrorCode ierr; 1522 1523 PetscFunctionBegin; 1524 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1525 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1526 PetscFunctionReturn(0); 1527 } 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1530 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1531 { 1532 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1533 PetscErrorCode ierr; 1534 1535 PetscFunctionBegin; 1536 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatEqual_MPIAIJ" 1542 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1543 { 1544 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1545 Mat a,b,c,d; 1546 PetscTruth flg; 1547 PetscErrorCode ierr; 1548 1549 PetscFunctionBegin; 1550 a = matA->A; b = matA->B; 1551 c = matB->A; d = matB->B; 1552 1553 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1554 if (flg) { 1555 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1556 } 1557 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatCopy_MPIAIJ" 1563 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1564 { 1565 PetscErrorCode ierr; 1566 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1567 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1568 1569 PetscFunctionBegin; 1570 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1571 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1572 /* because of the column compression in the off-processor part of the matrix a->B, 1573 the number of columns in a->B and b->B may be different, hence we cannot call 1574 the MatCopy() directly on the two parts. If need be, we can provide a more 1575 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1576 then copying the submatrices */ 1577 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1578 } else { 1579 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1580 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1587 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1588 { 1589 PetscErrorCode ierr; 1590 1591 PetscFunctionBegin; 1592 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1593 PetscFunctionReturn(0); 1594 } 1595 1596 #include "petscblaslapack.h" 1597 #undef __FUNCT__ 1598 #define __FUNCT__ "MatAXPY_MPIAIJ" 1599 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1600 { 1601 PetscErrorCode ierr; 1602 PetscInt i; 1603 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1604 PetscBLASInt bnz,one=1; 1605 Mat_SeqAIJ *x,*y; 1606 1607 PetscFunctionBegin; 1608 if (str == SAME_NONZERO_PATTERN) { 1609 x = (Mat_SeqAIJ *)xx->A->data; 1610 y = (Mat_SeqAIJ *)yy->A->data; 1611 bnz = (PetscBLASInt)x->nz; 1612 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1613 x = (Mat_SeqAIJ *)xx->B->data; 1614 y = (Mat_SeqAIJ *)yy->B->data; 1615 bnz = (PetscBLASInt)x->nz; 1616 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1617 } else if (str == SUBSET_NONZERO_PATTERN) { 1618 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1619 1620 x = (Mat_SeqAIJ *)xx->B->data; 1621 y = (Mat_SeqAIJ *)yy->B->data; 1622 if (y->xtoy && y->XtoY != xx->B) { 1623 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1624 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1625 } 1626 if (!y->xtoy) { /* get xtoy */ 1627 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1628 y->XtoY = xx->B; 1629 } 1630 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1631 } else { 1632 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1633 } 1634 PetscFunctionReturn(0); 1635 } 1636 1637 /* -------------------------------------------------------------------*/ 1638 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1639 MatGetRow_MPIAIJ, 1640 MatRestoreRow_MPIAIJ, 1641 MatMult_MPIAIJ, 1642 /* 4*/ MatMultAdd_MPIAIJ, 1643 MatMultTranspose_MPIAIJ, 1644 MatMultTransposeAdd_MPIAIJ, 1645 0, 1646 0, 1647 0, 1648 /*10*/ 0, 1649 0, 1650 0, 1651 MatRelax_MPIAIJ, 1652 MatTranspose_MPIAIJ, 1653 /*15*/ MatGetInfo_MPIAIJ, 1654 MatEqual_MPIAIJ, 1655 MatGetDiagonal_MPIAIJ, 1656 MatDiagonalScale_MPIAIJ, 1657 MatNorm_MPIAIJ, 1658 /*20*/ MatAssemblyBegin_MPIAIJ, 1659 MatAssemblyEnd_MPIAIJ, 1660 0, 1661 MatSetOption_MPIAIJ, 1662 MatZeroEntries_MPIAIJ, 1663 /*25*/ MatZeroRows_MPIAIJ, 1664 0, 1665 0, 1666 0, 1667 0, 1668 /*30*/ MatSetUpPreallocation_MPIAIJ, 1669 0, 1670 0, 1671 0, 1672 0, 1673 /*35*/ MatDuplicate_MPIAIJ, 1674 0, 1675 0, 1676 0, 1677 0, 1678 /*40*/ MatAXPY_MPIAIJ, 1679 MatGetSubMatrices_MPIAIJ, 1680 MatIncreaseOverlap_MPIAIJ, 1681 MatGetValues_MPIAIJ, 1682 MatCopy_MPIAIJ, 1683 /*45*/ MatPrintHelp_MPIAIJ, 1684 MatScale_MPIAIJ, 1685 0, 1686 0, 1687 0, 1688 /*50*/ MatSetBlockSize_MPIAIJ, 1689 0, 1690 0, 1691 0, 1692 0, 1693 /*55*/ MatFDColoringCreate_MPIAIJ, 1694 0, 1695 MatSetUnfactored_MPIAIJ, 1696 0, 1697 0, 1698 /*60*/ MatGetSubMatrix_MPIAIJ, 1699 MatDestroy_MPIAIJ, 1700 MatView_MPIAIJ, 1701 MatGetPetscMaps_Petsc, 1702 0, 1703 /*65*/ 0, 1704 0, 1705 0, 1706 0, 1707 0, 1708 /*70*/ 0, 1709 0, 1710 MatSetColoring_MPIAIJ, 1711 #if defined(PETSC_HAVE_ADIC) 1712 MatSetValuesAdic_MPIAIJ, 1713 #else 1714 0, 1715 #endif 1716 MatSetValuesAdifor_MPIAIJ, 1717 /*75*/ 0, 1718 0, 1719 0, 1720 0, 1721 0, 1722 /*80*/ 0, 1723 0, 1724 0, 1725 0, 1726 /*84*/ MatLoad_MPIAIJ, 1727 0, 1728 0, 1729 0, 1730 0, 1731 0, 1732 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1733 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1734 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1735 MatPtAP_Basic, 1736 MatPtAPSymbolic_MPIAIJ, 1737 /*95*/ MatPtAPNumeric_MPIAIJ, 1738 0, 1739 0, 1740 0, 1741 0, 1742 /*100*/0, 1743 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1744 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1745 }; 1746 1747 /* ----------------------------------------------------------------------------------------*/ 1748 1749 EXTERN_C_BEGIN 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1752 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1753 { 1754 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1755 PetscErrorCode ierr; 1756 1757 PetscFunctionBegin; 1758 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1759 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1760 PetscFunctionReturn(0); 1761 } 1762 EXTERN_C_END 1763 1764 EXTERN_C_BEGIN 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1767 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1768 { 1769 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1770 PetscErrorCode ierr; 1771 1772 PetscFunctionBegin; 1773 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1774 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 EXTERN_C_END 1778 1779 #include "petscpc.h" 1780 EXTERN_C_BEGIN 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1783 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1784 { 1785 Mat_MPIAIJ *b; 1786 PetscErrorCode ierr; 1787 PetscInt i; 1788 1789 PetscFunctionBegin; 1790 B->preallocated = PETSC_TRUE; 1791 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1792 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1793 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1794 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1795 if (d_nnz) { 1796 for (i=0; i<B->m; i++) { 1797 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1798 } 1799 } 1800 if (o_nnz) { 1801 for (i=0; i<B->m; i++) { 1802 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1803 } 1804 } 1805 b = (Mat_MPIAIJ*)B->data; 1806 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1807 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1808 1809 PetscFunctionReturn(0); 1810 } 1811 EXTERN_C_END 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1815 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1816 { 1817 Mat mat; 1818 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1819 PetscErrorCode ierr; 1820 1821 PetscFunctionBegin; 1822 *newmat = 0; 1823 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1824 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1825 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1826 a = (Mat_MPIAIJ*)mat->data; 1827 1828 mat->factor = matin->factor; 1829 mat->bs = matin->bs; 1830 mat->assembled = PETSC_TRUE; 1831 mat->insertmode = NOT_SET_VALUES; 1832 mat->preallocated = PETSC_TRUE; 1833 1834 a->rstart = oldmat->rstart; 1835 a->rend = oldmat->rend; 1836 a->cstart = oldmat->cstart; 1837 a->cend = oldmat->cend; 1838 a->size = oldmat->size; 1839 a->rank = oldmat->rank; 1840 a->donotstash = oldmat->donotstash; 1841 a->roworiented = oldmat->roworiented; 1842 a->rowindices = 0; 1843 a->rowvalues = 0; 1844 a->getrowactive = PETSC_FALSE; 1845 1846 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1847 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1848 if (oldmat->colmap) { 1849 #if defined (PETSC_USE_CTABLE) 1850 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1851 #else 1852 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1853 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1854 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1855 #endif 1856 } else a->colmap = 0; 1857 if (oldmat->garray) { 1858 PetscInt len; 1859 len = oldmat->B->n; 1860 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1861 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1862 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1863 } else a->garray = 0; 1864 1865 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1866 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1867 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1868 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1869 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1870 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1871 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1872 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1873 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1874 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1875 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1876 *newmat = mat; 1877 PetscFunctionReturn(0); 1878 } 1879 1880 #include "petscsys.h" 1881 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "MatLoad_MPIAIJ" 1884 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1885 { 1886 Mat A; 1887 PetscScalar *vals,*svals; 1888 MPI_Comm comm = ((PetscObject)viewer)->comm; 1889 MPI_Status status; 1890 PetscErrorCode ierr; 1891 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1892 PetscInt i,nz,j,rstart,rend,mmax; 1893 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1894 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1895 PetscInt cend,cstart,n,*rowners; 1896 int fd; 1897 1898 PetscFunctionBegin; 1899 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1900 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1901 if (!rank) { 1902 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1903 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1904 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1905 } 1906 1907 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1908 M = header[1]; N = header[2]; 1909 /* determine ownership of all rows */ 1910 m = M/size + ((M % size) > rank); 1911 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1912 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1913 1914 /* First process needs enough room for process with most rows */ 1915 if (!rank) { 1916 mmax = rowners[1]; 1917 for (i=2; i<size; i++) { 1918 mmax = PetscMax(mmax,rowners[i]); 1919 } 1920 } else mmax = m; 1921 1922 rowners[0] = 0; 1923 for (i=2; i<=size; i++) { 1924 mmax = PetscMax(mmax,rowners[i]); 1925 rowners[i] += rowners[i-1]; 1926 } 1927 rstart = rowners[rank]; 1928 rend = rowners[rank+1]; 1929 1930 /* distribute row lengths to all processors */ 1931 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 1932 if (!rank) { 1933 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1934 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1935 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1936 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1937 for (j=0; j<m; j++) { 1938 procsnz[0] += ourlens[j]; 1939 } 1940 for (i=1; i<size; i++) { 1941 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1942 /* calculate the number of nonzeros on each processor */ 1943 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1944 procsnz[i] += rowlengths[j]; 1945 } 1946 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1947 } 1948 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1949 } else { 1950 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1951 } 1952 1953 if (!rank) { 1954 /* determine max buffer needed and allocate it */ 1955 maxnz = 0; 1956 for (i=0; i<size; i++) { 1957 maxnz = PetscMax(maxnz,procsnz[i]); 1958 } 1959 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1960 1961 /* read in my part of the matrix column indices */ 1962 nz = procsnz[0]; 1963 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1964 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1965 1966 /* read in every one elses and ship off */ 1967 for (i=1; i<size; i++) { 1968 nz = procsnz[i]; 1969 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1970 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1971 } 1972 ierr = PetscFree(cols);CHKERRQ(ierr); 1973 } else { 1974 /* determine buffer space needed for message */ 1975 nz = 0; 1976 for (i=0; i<m; i++) { 1977 nz += ourlens[i]; 1978 } 1979 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1980 1981 /* receive message of column indices*/ 1982 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1983 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1984 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1985 } 1986 1987 /* determine column ownership if matrix is not square */ 1988 if (N != M) { 1989 n = N/size + ((N % size) > rank); 1990 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1991 cstart = cend - n; 1992 } else { 1993 cstart = rstart; 1994 cend = rend; 1995 n = cend - cstart; 1996 } 1997 1998 /* loop over local rows, determining number of off diagonal entries */ 1999 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2000 jj = 0; 2001 for (i=0; i<m; i++) { 2002 for (j=0; j<ourlens[i]; j++) { 2003 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2004 jj++; 2005 } 2006 } 2007 2008 /* create our matrix */ 2009 for (i=0; i<m; i++) { 2010 ourlens[i] -= offlens[i]; 2011 } 2012 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2013 ierr = MatSetType(A,type);CHKERRQ(ierr); 2014 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2015 2016 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2017 for (i=0; i<m; i++) { 2018 ourlens[i] += offlens[i]; 2019 } 2020 2021 if (!rank) { 2022 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2023 2024 /* read in my part of the matrix numerical values */ 2025 nz = procsnz[0]; 2026 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2027 2028 /* insert into matrix */ 2029 jj = rstart; 2030 smycols = mycols; 2031 svals = vals; 2032 for (i=0; i<m; i++) { 2033 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2034 smycols += ourlens[i]; 2035 svals += ourlens[i]; 2036 jj++; 2037 } 2038 2039 /* read in other processors and ship out */ 2040 for (i=1; i<size; i++) { 2041 nz = procsnz[i]; 2042 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2043 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2044 } 2045 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2046 } else { 2047 /* receive numeric values */ 2048 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2049 2050 /* receive message of values*/ 2051 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2052 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2053 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2054 2055 /* insert into matrix */ 2056 jj = rstart; 2057 smycols = mycols; 2058 svals = vals; 2059 for (i=0; i<m; i++) { 2060 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2061 smycols += ourlens[i]; 2062 svals += ourlens[i]; 2063 jj++; 2064 } 2065 } 2066 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2067 ierr = PetscFree(vals);CHKERRQ(ierr); 2068 ierr = PetscFree(mycols);CHKERRQ(ierr); 2069 ierr = PetscFree(rowners);CHKERRQ(ierr); 2070 2071 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2072 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2073 *newmat = A; 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2079 /* 2080 Not great since it makes two copies of the submatrix, first an SeqAIJ 2081 in local and then by concatenating the local matrices the end result. 2082 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2083 */ 2084 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2085 { 2086 PetscErrorCode ierr; 2087 PetscMPIInt rank,size; 2088 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2089 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2090 Mat *local,M,Mreuse; 2091 PetscScalar *vwork,*aa; 2092 MPI_Comm comm = mat->comm; 2093 Mat_SeqAIJ *aij; 2094 2095 2096 PetscFunctionBegin; 2097 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2098 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2099 2100 if (call == MAT_REUSE_MATRIX) { 2101 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2102 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2103 local = &Mreuse; 2104 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2105 } else { 2106 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2107 Mreuse = *local; 2108 ierr = PetscFree(local);CHKERRQ(ierr); 2109 } 2110 2111 /* 2112 m - number of local rows 2113 n - number of columns (same on all processors) 2114 rstart - first row in new global matrix generated 2115 */ 2116 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2117 if (call == MAT_INITIAL_MATRIX) { 2118 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2119 ii = aij->i; 2120 jj = aij->j; 2121 2122 /* 2123 Determine the number of non-zeros in the diagonal and off-diagonal 2124 portions of the matrix in order to do correct preallocation 2125 */ 2126 2127 /* first get start and end of "diagonal" columns */ 2128 if (csize == PETSC_DECIDE) { 2129 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2130 if (mglobal == n) { /* square matrix */ 2131 nlocal = m; 2132 } else { 2133 nlocal = n/size + ((n % size) > rank); 2134 } 2135 } else { 2136 nlocal = csize; 2137 } 2138 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2139 rstart = rend - nlocal; 2140 if (rank == size - 1 && rend != n) { 2141 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2142 } 2143 2144 /* next, compute all the lengths */ 2145 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2146 olens = dlens + m; 2147 for (i=0; i<m; i++) { 2148 jend = ii[i+1] - ii[i]; 2149 olen = 0; 2150 dlen = 0; 2151 for (j=0; j<jend; j++) { 2152 if (*jj < rstart || *jj >= rend) olen++; 2153 else dlen++; 2154 jj++; 2155 } 2156 olens[i] = olen; 2157 dlens[i] = dlen; 2158 } 2159 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2160 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2161 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2162 ierr = PetscFree(dlens);CHKERRQ(ierr); 2163 } else { 2164 PetscInt ml,nl; 2165 2166 M = *newmat; 2167 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2168 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2169 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2170 /* 2171 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2172 rather than the slower MatSetValues(). 2173 */ 2174 M->was_assembled = PETSC_TRUE; 2175 M->assembled = PETSC_FALSE; 2176 } 2177 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2178 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2179 ii = aij->i; 2180 jj = aij->j; 2181 aa = aij->a; 2182 for (i=0; i<m; i++) { 2183 row = rstart + i; 2184 nz = ii[i+1] - ii[i]; 2185 cwork = jj; jj += nz; 2186 vwork = aa; aa += nz; 2187 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2188 } 2189 2190 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2191 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2192 *newmat = M; 2193 2194 /* save submatrix used in processor for next request */ 2195 if (call == MAT_INITIAL_MATRIX) { 2196 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2197 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2198 } 2199 2200 PetscFunctionReturn(0); 2201 } 2202 2203 EXTERN_C_BEGIN 2204 #undef __FUNCT__ 2205 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2206 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2207 { 2208 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2209 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2210 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2211 const PetscInt *JJ; 2212 PetscScalar *values; 2213 PetscErrorCode ierr; 2214 2215 PetscFunctionBegin; 2216 #if defined(PETSC_OPT_g) 2217 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2218 #endif 2219 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2220 o_nnz = d_nnz + m; 2221 2222 for (i=0; i<m; i++) { 2223 nnz = I[i+1]- I[i]; 2224 JJ = J + I[i]; 2225 nnz_max = PetscMax(nnz_max,nnz); 2226 #if defined(PETSC_OPT_g) 2227 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2228 #endif 2229 for (j=0; j<nnz; j++) { 2230 if (*JJ >= cstart) break; 2231 JJ++; 2232 } 2233 d = 0; 2234 for (; j<nnz; j++) { 2235 if (*JJ++ >= cend) break; 2236 d++; 2237 } 2238 d_nnz[i] = d; 2239 o_nnz[i] = nnz - d; 2240 } 2241 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2242 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2243 2244 if (v) values = (PetscScalar*)v; 2245 else { 2246 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2247 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2248 } 2249 2250 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2251 for (i=0; i<m; i++) { 2252 ii = i + rstart; 2253 nnz = I[i+1]- I[i]; 2254 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2255 } 2256 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2257 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2258 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2259 2260 if (!v) { 2261 ierr = PetscFree(values);CHKERRQ(ierr); 2262 } 2263 PetscFunctionReturn(0); 2264 } 2265 EXTERN_C_END 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2269 /*@C 2270 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2271 (the default parallel PETSc format). 2272 2273 Collective on MPI_Comm 2274 2275 Input Parameters: 2276 + A - the matrix 2277 . i - the indices into j for the start of each local row (starts with zero) 2278 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2279 - v - optional values in the matrix 2280 2281 Level: developer 2282 2283 .keywords: matrix, aij, compressed row, sparse, parallel 2284 2285 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2286 @*/ 2287 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2288 { 2289 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2290 2291 PetscFunctionBegin; 2292 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2293 if (f) { 2294 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2295 } 2296 PetscFunctionReturn(0); 2297 } 2298 2299 #undef __FUNCT__ 2300 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2301 /*@C 2302 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2303 (the default parallel PETSc format). For good matrix assembly performance 2304 the user should preallocate the matrix storage by setting the parameters 2305 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2306 performance can be increased by more than a factor of 50. 2307 2308 Collective on MPI_Comm 2309 2310 Input Parameters: 2311 + A - the matrix 2312 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2313 (same value is used for all local rows) 2314 . d_nnz - array containing the number of nonzeros in the various rows of the 2315 DIAGONAL portion of the local submatrix (possibly different for each row) 2316 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2317 The size of this array is equal to the number of local rows, i.e 'm'. 2318 You must leave room for the diagonal entry even if it is zero. 2319 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2320 submatrix (same value is used for all local rows). 2321 - o_nnz - array containing the number of nonzeros in the various rows of the 2322 OFF-DIAGONAL portion of the local submatrix (possibly different for 2323 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2324 structure. The size of this array is equal to the number 2325 of local rows, i.e 'm'. 2326 2327 If the *_nnz parameter is given then the *_nz parameter is ignored 2328 2329 The AIJ format (also called the Yale sparse matrix format or 2330 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2331 storage. The stored row and column indices begin with zero. See the users manual for details. 2332 2333 The parallel matrix is partitioned such that the first m0 rows belong to 2334 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2335 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2336 2337 The DIAGONAL portion of the local submatrix of a processor can be defined 2338 as the submatrix which is obtained by extraction the part corresponding 2339 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2340 first row that belongs to the processor, and r2 is the last row belonging 2341 to the this processor. This is a square mxm matrix. The remaining portion 2342 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2343 2344 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2345 2346 Example usage: 2347 2348 Consider the following 8x8 matrix with 34 non-zero values, that is 2349 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2350 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2351 as follows: 2352 2353 .vb 2354 1 2 0 | 0 3 0 | 0 4 2355 Proc0 0 5 6 | 7 0 0 | 8 0 2356 9 0 10 | 11 0 0 | 12 0 2357 ------------------------------------- 2358 13 0 14 | 15 16 17 | 0 0 2359 Proc1 0 18 0 | 19 20 21 | 0 0 2360 0 0 0 | 22 23 0 | 24 0 2361 ------------------------------------- 2362 Proc2 25 26 27 | 0 0 28 | 29 0 2363 30 0 0 | 31 32 33 | 0 34 2364 .ve 2365 2366 This can be represented as a collection of submatrices as: 2367 2368 .vb 2369 A B C 2370 D E F 2371 G H I 2372 .ve 2373 2374 Where the submatrices A,B,C are owned by proc0, D,E,F are 2375 owned by proc1, G,H,I are owned by proc2. 2376 2377 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2378 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2379 The 'M','N' parameters are 8,8, and have the same values on all procs. 2380 2381 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2382 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2383 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2384 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2385 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2386 matrix, ans [DF] as another SeqAIJ matrix. 2387 2388 When d_nz, o_nz parameters are specified, d_nz storage elements are 2389 allocated for every row of the local diagonal submatrix, and o_nz 2390 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2391 One way to choose d_nz and o_nz is to use the max nonzerors per local 2392 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2393 In this case, the values of d_nz,o_nz are: 2394 .vb 2395 proc0 : dnz = 2, o_nz = 2 2396 proc1 : dnz = 3, o_nz = 2 2397 proc2 : dnz = 1, o_nz = 4 2398 .ve 2399 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2400 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2401 for proc3. i.e we are using 12+15+10=37 storage locations to store 2402 34 values. 2403 2404 When d_nnz, o_nnz parameters are specified, the storage is specified 2405 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2406 In the above case the values for d_nnz,o_nnz are: 2407 .vb 2408 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2409 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2410 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2411 .ve 2412 Here the space allocated is sum of all the above values i.e 34, and 2413 hence pre-allocation is perfect. 2414 2415 Level: intermediate 2416 2417 .keywords: matrix, aij, compressed row, sparse, parallel 2418 2419 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2420 MPIAIJ 2421 @*/ 2422 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2423 { 2424 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2425 2426 PetscFunctionBegin; 2427 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2428 if (f) { 2429 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2430 } 2431 PetscFunctionReturn(0); 2432 } 2433 2434 #undef __FUNCT__ 2435 #define __FUNCT__ "MatCreateMPIAIJ" 2436 /*@C 2437 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2438 (the default parallel PETSc format). For good matrix assembly performance 2439 the user should preallocate the matrix storage by setting the parameters 2440 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2441 performance can be increased by more than a factor of 50. 2442 2443 Collective on MPI_Comm 2444 2445 Input Parameters: 2446 + comm - MPI communicator 2447 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2448 This value should be the same as the local size used in creating the 2449 y vector for the matrix-vector product y = Ax. 2450 . n - This value should be the same as the local size used in creating the 2451 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2452 calculated if N is given) For square matrices n is almost always m. 2453 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2454 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2455 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2456 (same value is used for all local rows) 2457 . d_nnz - array containing the number of nonzeros in the various rows of the 2458 DIAGONAL portion of the local submatrix (possibly different for each row) 2459 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2460 The size of this array is equal to the number of local rows, i.e 'm'. 2461 You must leave room for the diagonal entry even if it is zero. 2462 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2463 submatrix (same value is used for all local rows). 2464 - o_nnz - array containing the number of nonzeros in the various rows of the 2465 OFF-DIAGONAL portion of the local submatrix (possibly different for 2466 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2467 structure. The size of this array is equal to the number 2468 of local rows, i.e 'm'. 2469 2470 Output Parameter: 2471 . A - the matrix 2472 2473 Notes: 2474 If the *_nnz parameter is given then the *_nz parameter is ignored 2475 2476 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2477 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2478 storage requirements for this matrix. 2479 2480 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2481 processor than it must be used on all processors that share the object for 2482 that argument. 2483 2484 The user MUST specify either the local or global matrix dimensions 2485 (possibly both). 2486 2487 The parallel matrix is partitioned such that the first m0 rows belong to 2488 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2489 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2490 2491 The DIAGONAL portion of the local submatrix of a processor can be defined 2492 as the submatrix which is obtained by extraction the part corresponding 2493 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2494 first row that belongs to the processor, and r2 is the last row belonging 2495 to the this processor. This is a square mxm matrix. The remaining portion 2496 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2497 2498 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2499 2500 When calling this routine with a single process communicator, a matrix of 2501 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2502 type of communicator, use the construction mechanism: 2503 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2504 2505 By default, this format uses inodes (identical nodes) when possible. 2506 We search for consecutive rows with the same nonzero structure, thereby 2507 reusing matrix information to achieve increased efficiency. 2508 2509 Options Database Keys: 2510 + -mat_aij_no_inode - Do not use inodes 2511 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2512 - -mat_aij_oneindex - Internally use indexing starting at 1 2513 rather than 0. Note that when calling MatSetValues(), 2514 the user still MUST index entries starting at 0! 2515 2516 2517 Example usage: 2518 2519 Consider the following 8x8 matrix with 34 non-zero values, that is 2520 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2521 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2522 as follows: 2523 2524 .vb 2525 1 2 0 | 0 3 0 | 0 4 2526 Proc0 0 5 6 | 7 0 0 | 8 0 2527 9 0 10 | 11 0 0 | 12 0 2528 ------------------------------------- 2529 13 0 14 | 15 16 17 | 0 0 2530 Proc1 0 18 0 | 19 20 21 | 0 0 2531 0 0 0 | 22 23 0 | 24 0 2532 ------------------------------------- 2533 Proc2 25 26 27 | 0 0 28 | 29 0 2534 30 0 0 | 31 32 33 | 0 34 2535 .ve 2536 2537 This can be represented as a collection of submatrices as: 2538 2539 .vb 2540 A B C 2541 D E F 2542 G H I 2543 .ve 2544 2545 Where the submatrices A,B,C are owned by proc0, D,E,F are 2546 owned by proc1, G,H,I are owned by proc2. 2547 2548 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2549 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2550 The 'M','N' parameters are 8,8, and have the same values on all procs. 2551 2552 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2553 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2554 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2555 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2556 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2557 matrix, ans [DF] as another SeqAIJ matrix. 2558 2559 When d_nz, o_nz parameters are specified, d_nz storage elements are 2560 allocated for every row of the local diagonal submatrix, and o_nz 2561 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2562 One way to choose d_nz and o_nz is to use the max nonzerors per local 2563 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2564 In this case, the values of d_nz,o_nz are: 2565 .vb 2566 proc0 : dnz = 2, o_nz = 2 2567 proc1 : dnz = 3, o_nz = 2 2568 proc2 : dnz = 1, o_nz = 4 2569 .ve 2570 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2571 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2572 for proc3. i.e we are using 12+15+10=37 storage locations to store 2573 34 values. 2574 2575 When d_nnz, o_nnz parameters are specified, the storage is specified 2576 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2577 In the above case the values for d_nnz,o_nnz are: 2578 .vb 2579 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2580 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2581 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2582 .ve 2583 Here the space allocated is sum of all the above values i.e 34, and 2584 hence pre-allocation is perfect. 2585 2586 Level: intermediate 2587 2588 .keywords: matrix, aij, compressed row, sparse, parallel 2589 2590 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2591 MPIAIJ 2592 @*/ 2593 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2594 { 2595 PetscErrorCode ierr; 2596 PetscMPIInt size; 2597 2598 PetscFunctionBegin; 2599 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2600 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2601 if (size > 1) { 2602 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2603 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2604 } else { 2605 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2606 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2607 } 2608 PetscFunctionReturn(0); 2609 } 2610 2611 #undef __FUNCT__ 2612 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2613 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2614 { 2615 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2616 2617 PetscFunctionBegin; 2618 *Ad = a->A; 2619 *Ao = a->B; 2620 *colmap = a->garray; 2621 PetscFunctionReturn(0); 2622 } 2623 2624 #undef __FUNCT__ 2625 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2626 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2627 { 2628 PetscErrorCode ierr; 2629 PetscInt i; 2630 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2631 2632 PetscFunctionBegin; 2633 if (coloring->ctype == IS_COLORING_LOCAL) { 2634 ISColoringValue *allcolors,*colors; 2635 ISColoring ocoloring; 2636 2637 /* set coloring for diagonal portion */ 2638 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2639 2640 /* set coloring for off-diagonal portion */ 2641 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2642 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2643 for (i=0; i<a->B->n; i++) { 2644 colors[i] = allcolors[a->garray[i]]; 2645 } 2646 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2647 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2648 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2649 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2650 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2651 ISColoringValue *colors; 2652 PetscInt *larray; 2653 ISColoring ocoloring; 2654 2655 /* set coloring for diagonal portion */ 2656 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2657 for (i=0; i<a->A->n; i++) { 2658 larray[i] = i + a->cstart; 2659 } 2660 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2661 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2662 for (i=0; i<a->A->n; i++) { 2663 colors[i] = coloring->colors[larray[i]]; 2664 } 2665 ierr = PetscFree(larray);CHKERRQ(ierr); 2666 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2667 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2668 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2669 2670 /* set coloring for off-diagonal portion */ 2671 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2672 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2673 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2674 for (i=0; i<a->B->n; i++) { 2675 colors[i] = coloring->colors[larray[i]]; 2676 } 2677 ierr = PetscFree(larray);CHKERRQ(ierr); 2678 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2679 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2680 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2681 } else { 2682 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2683 } 2684 2685 PetscFunctionReturn(0); 2686 } 2687 2688 #if defined(PETSC_HAVE_ADIC) 2689 #undef __FUNCT__ 2690 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2691 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2692 { 2693 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2694 PetscErrorCode ierr; 2695 2696 PetscFunctionBegin; 2697 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2698 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2699 PetscFunctionReturn(0); 2700 } 2701 #endif 2702 2703 #undef __FUNCT__ 2704 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2705 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2708 PetscErrorCode ierr; 2709 2710 PetscFunctionBegin; 2711 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2712 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatMerge" 2718 /*@C 2719 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2720 matrices from each processor 2721 2722 Collective on MPI_Comm 2723 2724 Input Parameters: 2725 + comm - the communicators the parallel matrix will live on 2726 . inmat - the input sequential matrices 2727 . n - number of local columns (or PETSC_DECIDE) 2728 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2729 2730 Output Parameter: 2731 . outmat - the parallel matrix generated 2732 2733 Level: advanced 2734 2735 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2736 2737 @*/ 2738 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2739 { 2740 PetscErrorCode ierr; 2741 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2742 PetscInt *indx; 2743 PetscScalar *values; 2744 PetscMap columnmap,rowmap; 2745 2746 PetscFunctionBegin; 2747 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2748 /* 2749 PetscMPIInt rank; 2750 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2751 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2752 */ 2753 if (scall == MAT_INITIAL_MATRIX){ 2754 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2755 if (n == PETSC_DECIDE){ 2756 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2757 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2758 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2759 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2760 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2761 } 2762 2763 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2764 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2765 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2766 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2767 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2768 2769 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2770 for (i=0;i<m;i++) { 2771 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2772 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2773 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2774 } 2775 /* This routine will ONLY return MPIAIJ type matrix */ 2776 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2777 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2778 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2779 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2780 2781 } else if (scall == MAT_REUSE_MATRIX){ 2782 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2783 } else { 2784 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2785 } 2786 2787 for (i=0;i<m;i++) { 2788 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2789 I = i + rstart; 2790 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2791 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2792 } 2793 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2794 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2795 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2796 2797 PetscFunctionReturn(0); 2798 } 2799 2800 #undef __FUNCT__ 2801 #define __FUNCT__ "MatFileSplit" 2802 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2803 { 2804 PetscErrorCode ierr; 2805 PetscMPIInt rank; 2806 PetscInt m,N,i,rstart,nnz; 2807 size_t len; 2808 const PetscInt *indx; 2809 PetscViewer out; 2810 char *name; 2811 Mat B; 2812 const PetscScalar *values; 2813 2814 PetscFunctionBegin; 2815 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2816 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2817 /* Should this be the type of the diagonal block of A? */ 2818 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2819 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2820 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2821 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2822 for (i=0;i<m;i++) { 2823 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2824 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2825 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2826 } 2827 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2828 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2829 2830 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2831 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2832 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2833 sprintf(name,"%s.%d",outfile,rank); 2834 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2835 ierr = PetscFree(name); 2836 ierr = MatView(B,out);CHKERRQ(ierr); 2837 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2838 ierr = MatDestroy(B);CHKERRQ(ierr); 2839 PetscFunctionReturn(0); 2840 } 2841 2842 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2843 #undef __FUNCT__ 2844 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2845 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2846 { 2847 PetscErrorCode ierr; 2848 Mat_Merge_SeqsToMPI *merge; 2849 PetscObjectContainer container; 2850 2851 PetscFunctionBegin; 2852 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2853 if (container) { 2854 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2855 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2856 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2857 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2858 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2859 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2860 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2861 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2862 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2863 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2864 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2865 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2866 2867 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2868 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2869 } 2870 ierr = PetscFree(merge);CHKERRQ(ierr); 2871 2872 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2873 PetscFunctionReturn(0); 2874 } 2875 2876 #include "src/mat/utils/freespace.h" 2877 #include "petscbt.h" 2878 #undef __FUNCT__ 2879 #define __FUNCT__ "MatMerge_SeqsToMPI" 2880 /*@C 2881 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2882 matrices from each processor 2883 2884 Collective on MPI_Comm 2885 2886 Input Parameters: 2887 + comm - the communicators the parallel matrix will live on 2888 . seqmat - the input sequential matrices 2889 . m - number of local rows (or PETSC_DECIDE) 2890 . n - number of local columns (or PETSC_DECIDE) 2891 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2892 2893 Output Parameter: 2894 . mpimat - the parallel matrix generated 2895 2896 Level: advanced 2897 2898 Notes: 2899 The dimensions of the sequential matrix in each processor MUST be the same. 2900 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2901 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2902 @*/ 2903 static PetscEvent logkey_seqstompinum = 0; 2904 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2905 { 2906 PetscErrorCode ierr; 2907 MPI_Comm comm=mpimat->comm; 2908 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2909 PetscMPIInt size,rank,taga,*len_s; 2910 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2911 PetscInt proc,m; 2912 PetscInt **buf_ri,**buf_rj; 2913 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2914 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2915 MPI_Request *s_waits,*r_waits; 2916 MPI_Status *status; 2917 MatScalar *aa=a->a,**abuf_r,*ba_i; 2918 Mat_Merge_SeqsToMPI *merge; 2919 PetscObjectContainer container; 2920 2921 PetscFunctionBegin; 2922 if (!logkey_seqstompinum) { 2923 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2924 } 2925 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2926 2927 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2928 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2929 2930 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2931 if (container) { 2932 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2933 } 2934 bi = merge->bi; 2935 bj = merge->bj; 2936 buf_ri = merge->buf_ri; 2937 buf_rj = merge->buf_rj; 2938 2939 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2940 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2941 len_s = merge->len_s; 2942 2943 /* send and recv matrix values */ 2944 /*-----------------------------*/ 2945 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2946 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2947 2948 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2949 for (proc=0,k=0; proc<size; proc++){ 2950 if (!len_s[proc]) continue; 2951 i = owners[proc]; 2952 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2953 k++; 2954 } 2955 2956 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2957 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2958 ierr = PetscFree(status);CHKERRQ(ierr); 2959 2960 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2961 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2962 2963 /* insert mat values of mpimat */ 2964 /*----------------------------*/ 2965 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2966 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2967 nextrow = buf_ri_k + merge->nrecv; 2968 nextai = nextrow + merge->nrecv; 2969 2970 for (k=0; k<merge->nrecv; k++){ 2971 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2972 nrows = *(buf_ri_k[k]); 2973 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2974 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2975 } 2976 2977 /* set values of ba */ 2978 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2979 for (i=0; i<m; i++) { 2980 arow = owners[rank] + i; 2981 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2982 bnzi = bi[i+1] - bi[i]; 2983 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2984 2985 /* add local non-zero vals of this proc's seqmat into ba */ 2986 anzi = ai[arow+1] - ai[arow]; 2987 aj = a->j + ai[arow]; 2988 aa = a->a + ai[arow]; 2989 nextaj = 0; 2990 for (j=0; nextaj<anzi; j++){ 2991 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2992 ba_i[j] += aa[nextaj++]; 2993 } 2994 } 2995 2996 /* add received vals into ba */ 2997 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2998 /* i-th row */ 2999 if (i == *nextrow[k]) { 3000 anzi = *(nextai[k]+1) - *nextai[k]; 3001 aj = buf_rj[k] + *(nextai[k]); 3002 aa = abuf_r[k] + *(nextai[k]); 3003 nextaj = 0; 3004 for (j=0; nextaj<anzi; j++){ 3005 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3006 ba_i[j] += aa[nextaj++]; 3007 } 3008 } 3009 nextrow[k]++; nextai[k]++; 3010 } 3011 } 3012 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3013 } 3014 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3015 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3016 3017 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3018 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3019 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3020 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3021 PetscFunctionReturn(0); 3022 } 3023 static PetscEvent logkey_seqstompisym = 0; 3024 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3025 { 3026 PetscErrorCode ierr; 3027 Mat B_mpi; 3028 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3029 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3030 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3031 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3032 PetscInt len,proc,*dnz,*onz; 3033 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3034 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3035 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3036 MPI_Status *status; 3037 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3038 PetscBT lnkbt; 3039 Mat_Merge_SeqsToMPI *merge; 3040 PetscObjectContainer container; 3041 3042 PetscFunctionBegin; 3043 if (!logkey_seqstompisym) { 3044 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3045 } 3046 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3047 3048 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3049 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3050 3051 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3052 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3053 3054 /* determine row ownership */ 3055 /*---------------------------------------------------------*/ 3056 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3057 if (m == PETSC_DECIDE) { 3058 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3059 } else { 3060 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3061 } 3062 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3063 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3064 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3065 3066 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3067 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3068 3069 /* determine the number of messages to send, their lengths */ 3070 /*---------------------------------------------------------*/ 3071 len_s = merge->len_s; 3072 3073 len = 0; /* length of buf_si[] */ 3074 merge->nsend = 0; 3075 for (proc=0; proc<size; proc++){ 3076 len_si[proc] = 0; 3077 if (proc == rank){ 3078 len_s[proc] = 0; 3079 } else { 3080 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3081 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3082 } 3083 if (len_s[proc]) { 3084 merge->nsend++; 3085 nrows = 0; 3086 for (i=owners[proc]; i<owners[proc+1]; i++){ 3087 if (ai[i+1] > ai[i]) nrows++; 3088 } 3089 len_si[proc] = 2*(nrows+1); 3090 len += len_si[proc]; 3091 } 3092 } 3093 3094 /* determine the number and length of messages to receive for ij-structure */ 3095 /*-------------------------------------------------------------------------*/ 3096 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3097 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3098 3099 /* post the Irecv of j-structure */ 3100 /*-------------------------------*/ 3101 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3102 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3103 3104 /* post the Isend of j-structure */ 3105 /*--------------------------------*/ 3106 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3107 sj_waits = si_waits + merge->nsend; 3108 3109 for (proc=0, k=0; proc<size; proc++){ 3110 if (!len_s[proc]) continue; 3111 i = owners[proc]; 3112 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3113 k++; 3114 } 3115 3116 /* receives and sends of j-structure are complete */ 3117 /*------------------------------------------------*/ 3118 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3119 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3120 3121 /* send and recv i-structure */ 3122 /*---------------------------*/ 3123 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3124 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3125 3126 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3127 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3128 for (proc=0,k=0; proc<size; proc++){ 3129 if (!len_s[proc]) continue; 3130 /* form outgoing message for i-structure: 3131 buf_si[0]: nrows to be sent 3132 [1:nrows]: row index (global) 3133 [nrows+1:2*nrows+1]: i-structure index 3134 */ 3135 /*-------------------------------------------*/ 3136 nrows = len_si[proc]/2 - 1; 3137 buf_si_i = buf_si + nrows+1; 3138 buf_si[0] = nrows; 3139 buf_si_i[0] = 0; 3140 nrows = 0; 3141 for (i=owners[proc]; i<owners[proc+1]; i++){ 3142 anzi = ai[i+1] - ai[i]; 3143 if (anzi) { 3144 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3145 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3146 nrows++; 3147 } 3148 } 3149 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3150 k++; 3151 buf_si += len_si[proc]; 3152 } 3153 3154 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3155 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3156 3157 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3158 for (i=0; i<merge->nrecv; i++){ 3159 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3160 } 3161 3162 ierr = PetscFree(len_si);CHKERRQ(ierr); 3163 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3164 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3165 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3166 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3167 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3168 ierr = PetscFree(status);CHKERRQ(ierr); 3169 3170 /* compute a local seq matrix in each processor */ 3171 /*----------------------------------------------*/ 3172 /* allocate bi array and free space for accumulating nonzero column info */ 3173 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3174 bi[0] = 0; 3175 3176 /* create and initialize a linked list */ 3177 nlnk = N+1; 3178 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3179 3180 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3181 len = 0; 3182 len = ai[owners[rank+1]] - ai[owners[rank]]; 3183 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3184 current_space = free_space; 3185 3186 /* determine symbolic info for each local row */ 3187 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3188 nextrow = buf_ri_k + merge->nrecv; 3189 nextai = nextrow + merge->nrecv; 3190 for (k=0; k<merge->nrecv; k++){ 3191 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3192 nrows = *buf_ri_k[k]; 3193 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3194 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3195 } 3196 3197 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3198 len = 0; 3199 for (i=0;i<m;i++) { 3200 bnzi = 0; 3201 /* add local non-zero cols of this proc's seqmat into lnk */ 3202 arow = owners[rank] + i; 3203 anzi = ai[arow+1] - ai[arow]; 3204 aj = a->j + ai[arow]; 3205 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3206 bnzi += nlnk; 3207 /* add received col data into lnk */ 3208 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3209 if (i == *nextrow[k]) { /* i-th row */ 3210 anzi = *(nextai[k]+1) - *nextai[k]; 3211 aj = buf_rj[k] + *nextai[k]; 3212 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3213 bnzi += nlnk; 3214 nextrow[k]++; nextai[k]++; 3215 } 3216 } 3217 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3218 3219 /* if free space is not available, make more free space */ 3220 if (current_space->local_remaining<bnzi) { 3221 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3222 nspacedouble++; 3223 } 3224 /* copy data into free space, then initialize lnk */ 3225 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3226 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3227 3228 current_space->array += bnzi; 3229 current_space->local_used += bnzi; 3230 current_space->local_remaining -= bnzi; 3231 3232 bi[i+1] = bi[i] + bnzi; 3233 } 3234 3235 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3236 3237 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3238 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3239 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3240 3241 /* create symbolic parallel matrix B_mpi */ 3242 /*---------------------------------------*/ 3243 if (n==PETSC_DECIDE) { 3244 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3245 } else { 3246 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3247 } 3248 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3249 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3250 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3251 3252 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3253 B_mpi->assembled = PETSC_FALSE; 3254 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3255 merge->bi = bi; 3256 merge->bj = bj; 3257 merge->buf_ri = buf_ri; 3258 merge->buf_rj = buf_rj; 3259 merge->coi = PETSC_NULL; 3260 merge->coj = PETSC_NULL; 3261 merge->owners_co = PETSC_NULL; 3262 3263 /* attach the supporting struct to B_mpi for reuse */ 3264 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3265 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3266 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3267 *mpimat = B_mpi; 3268 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3269 PetscFunctionReturn(0); 3270 } 3271 3272 static PetscEvent logkey_seqstompi = 0; 3273 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3274 { 3275 PetscErrorCode ierr; 3276 3277 PetscFunctionBegin; 3278 if (!logkey_seqstompi) { 3279 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3280 } 3281 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3282 if (scall == MAT_INITIAL_MATRIX){ 3283 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3284 } 3285 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3286 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3287 PetscFunctionReturn(0); 3288 } 3289 static PetscEvent logkey_getlocalmat = 0; 3290 #undef __FUNCT__ 3291 #define __FUNCT__ "MatGetLocalMat" 3292 /*@C 3293 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3294 3295 Not Collective 3296 3297 Input Parameters: 3298 + A - the matrix 3299 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3300 3301 Output Parameter: 3302 . A_loc - the local sequential matrix generated 3303 3304 Level: developer 3305 3306 @*/ 3307 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3308 { 3309 PetscErrorCode ierr; 3310 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3311 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3312 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3313 PetscScalar *aa=a->a,*ba=b->a,*ca; 3314 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3315 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3316 3317 PetscFunctionBegin; 3318 if (!logkey_getlocalmat) { 3319 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3320 } 3321 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3322 if (scall == MAT_INITIAL_MATRIX){ 3323 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3324 ci[0] = 0; 3325 for (i=0; i<am; i++){ 3326 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3327 } 3328 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3329 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3330 k = 0; 3331 for (i=0; i<am; i++) { 3332 ncols_o = bi[i+1] - bi[i]; 3333 ncols_d = ai[i+1] - ai[i]; 3334 /* off-diagonal portion of A */ 3335 for (jo=0; jo<ncols_o; jo++) { 3336 col = cmap[*bj]; 3337 if (col >= cstart) break; 3338 cj[k] = col; bj++; 3339 ca[k++] = *ba++; 3340 } 3341 /* diagonal portion of A */ 3342 for (j=0; j<ncols_d; j++) { 3343 cj[k] = cstart + *aj++; 3344 ca[k++] = *aa++; 3345 } 3346 /* off-diagonal portion of A */ 3347 for (j=jo; j<ncols_o; j++) { 3348 cj[k] = cmap[*bj++]; 3349 ca[k++] = *ba++; 3350 } 3351 } 3352 /* put together the new matrix */ 3353 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3354 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3355 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3356 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3357 mat->freedata = PETSC_TRUE; 3358 mat->nonew = 0; 3359 } else if (scall == MAT_REUSE_MATRIX){ 3360 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3361 ci = mat->i; cj = mat->j; ca = mat->a; 3362 for (i=0; i<am; i++) { 3363 /* off-diagonal portion of A */ 3364 ncols_o = bi[i+1] - bi[i]; 3365 for (jo=0; jo<ncols_o; jo++) { 3366 col = cmap[*bj]; 3367 if (col >= cstart) break; 3368 *ca++ = *ba++; bj++; 3369 } 3370 /* diagonal portion of A */ 3371 ncols_d = ai[i+1] - ai[i]; 3372 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3373 /* off-diagonal portion of A */ 3374 for (j=jo; j<ncols_o; j++) { 3375 *ca++ = *ba++; bj++; 3376 } 3377 } 3378 } else { 3379 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3380 } 3381 3382 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3383 PetscFunctionReturn(0); 3384 } 3385 3386 static PetscEvent logkey_getlocalmatcondensed = 0; 3387 #undef __FUNCT__ 3388 #define __FUNCT__ "MatGetLocalMatCondensed" 3389 /*@C 3390 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3391 3392 Not Collective 3393 3394 Input Parameters: 3395 + A - the matrix 3396 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3397 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3398 3399 Output Parameter: 3400 . A_loc - the local sequential matrix generated 3401 3402 Level: developer 3403 3404 @*/ 3405 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3406 { 3407 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3408 PetscErrorCode ierr; 3409 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3410 IS isrowa,iscola; 3411 Mat *aloc; 3412 3413 PetscFunctionBegin; 3414 if (!logkey_getlocalmatcondensed) { 3415 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3416 } 3417 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3418 if (!row){ 3419 start = a->rstart; end = a->rend; 3420 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3421 } else { 3422 isrowa = *row; 3423 } 3424 if (!col){ 3425 start = a->cstart; 3426 cmap = a->garray; 3427 nzA = a->A->n; 3428 nzB = a->B->n; 3429 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3430 ncols = 0; 3431 for (i=0; i<nzB; i++) { 3432 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3433 else break; 3434 } 3435 imark = i; 3436 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3437 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3438 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3439 ierr = PetscFree(idx);CHKERRQ(ierr); 3440 } else { 3441 iscola = *col; 3442 } 3443 if (scall != MAT_INITIAL_MATRIX){ 3444 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3445 aloc[0] = *A_loc; 3446 } 3447 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3448 *A_loc = aloc[0]; 3449 ierr = PetscFree(aloc);CHKERRQ(ierr); 3450 if (!row){ 3451 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3452 } 3453 if (!col){ 3454 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3455 } 3456 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3457 PetscFunctionReturn(0); 3458 } 3459 3460 static PetscEvent logkey_GetBrowsOfAcols = 0; 3461 #undef __FUNCT__ 3462 #define __FUNCT__ "MatGetBrowsOfAcols" 3463 /*@C 3464 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3465 3466 Collective on Mat 3467 3468 Input Parameters: 3469 + A,B - the matrices in mpiaij format 3470 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3471 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3472 3473 Output Parameter: 3474 + rowb, colb - index sets of rows and columns of B to extract 3475 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3476 - B_seq - the sequential matrix generated 3477 3478 Level: developer 3479 3480 @*/ 3481 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3482 { 3483 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3484 PetscErrorCode ierr; 3485 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3486 IS isrowb,iscolb; 3487 Mat *bseq; 3488 3489 PetscFunctionBegin; 3490 if (a->cstart != b->rstart || a->cend != b->rend){ 3491 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3492 } 3493 if (!logkey_GetBrowsOfAcols) { 3494 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3495 } 3496 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3497 3498 if (scall == MAT_INITIAL_MATRIX){ 3499 start = a->cstart; 3500 cmap = a->garray; 3501 nzA = a->A->n; 3502 nzB = a->B->n; 3503 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3504 ncols = 0; 3505 for (i=0; i<nzB; i++) { /* row < local row index */ 3506 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3507 else break; 3508 } 3509 imark = i; 3510 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3511 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3512 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3513 ierr = PetscFree(idx);CHKERRQ(ierr); 3514 *brstart = imark; 3515 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3516 } else { 3517 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3518 isrowb = *rowb; iscolb = *colb; 3519 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3520 bseq[0] = *B_seq; 3521 } 3522 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3523 *B_seq = bseq[0]; 3524 ierr = PetscFree(bseq);CHKERRQ(ierr); 3525 if (!rowb){ 3526 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3527 } else { 3528 *rowb = isrowb; 3529 } 3530 if (!colb){ 3531 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3532 } else { 3533 *colb = iscolb; 3534 } 3535 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3536 PetscFunctionReturn(0); 3537 } 3538 3539 static PetscEvent logkey_GetBrowsOfAocols = 0; 3540 #undef __FUNCT__ 3541 #define __FUNCT__ "MatGetBrowsOfAoCols" 3542 /*@C 3543 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3544 of the OFF-DIAGONAL portion of local A 3545 3546 Collective on Mat 3547 3548 Input Parameters: 3549 + A,B - the matrices in mpiaij format 3550 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3551 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3552 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3553 3554 Output Parameter: 3555 + B_oth - the sequential matrix generated 3556 3557 Level: developer 3558 3559 @*/ 3560 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3561 { 3562 VecScatter_MPI_General *gen_to,*gen_from; 3563 PetscErrorCode ierr; 3564 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3565 Mat_SeqAIJ *b_oth; 3566 VecScatter ctx=a->Mvctx; 3567 MPI_Comm comm=ctx->comm; 3568 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3569 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3570 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3571 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3572 MPI_Request *rwaits,*swaits; 3573 MPI_Status *sstatus,rstatus; 3574 PetscInt *cols; 3575 PetscScalar *vals; 3576 PetscMPIInt j; 3577 3578 PetscFunctionBegin; 3579 if (a->cstart != b->rstart || a->cend != b->rend){ 3580 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3581 } 3582 if (!logkey_GetBrowsOfAocols) { 3583 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3584 } 3585 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3586 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3587 3588 gen_to = (VecScatter_MPI_General*)ctx->todata; 3589 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3590 rvalues = gen_from->values; /* holds the length of sending row */ 3591 svalues = gen_to->values; /* holds the length of receiving row */ 3592 nrecvs = gen_from->n; 3593 nsends = gen_to->n; 3594 rwaits = gen_from->requests; 3595 swaits = gen_to->requests; 3596 srow = gen_to->indices; /* local row index to be sent */ 3597 rstarts = gen_from->starts; 3598 sstarts = gen_to->starts; 3599 rprocs = gen_from->procs; 3600 sprocs = gen_to->procs; 3601 sstatus = gen_to->sstatus; 3602 3603 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3604 if (scall == MAT_INITIAL_MATRIX){ 3605 /* i-array */ 3606 /*---------*/ 3607 /* post receives */ 3608 for (i=0; i<nrecvs; i++){ 3609 rowlen = (PetscInt*)rvalues + rstarts[i]; 3610 nrows = rstarts[i+1]-rstarts[i]; 3611 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3612 } 3613 3614 /* pack the outgoing message */ 3615 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3616 rstartsj = sstartsj + nsends +1; 3617 sstartsj[0] = 0; rstartsj[0] = 0; 3618 len = 0; /* total length of j or a array to be sent */ 3619 k = 0; 3620 for (i=0; i<nsends; i++){ 3621 rowlen = (PetscInt*)svalues + sstarts[i]; 3622 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3623 for (j=0; j<nrows; j++) { 3624 row = srow[k] + b->rowners[rank]; /* global row idx */ 3625 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3626 len += rowlen[j]; 3627 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3628 k++; 3629 } 3630 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3631 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3632 } 3633 /* recvs and sends of i-array are completed */ 3634 i = nrecvs; 3635 while (i--) { 3636 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3637 } 3638 if (nsends) { 3639 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3640 } 3641 /* allocate buffers for sending j and a arrays */ 3642 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3643 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3644 3645 /* create i-array of B_oth */ 3646 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3647 b_othi[0] = 0; 3648 len = 0; /* total length of j or a array to be received */ 3649 k = 0; 3650 for (i=0; i<nrecvs; i++){ 3651 rowlen = (PetscInt*)rvalues + rstarts[i]; 3652 nrows = rstarts[i+1]-rstarts[i]; 3653 for (j=0; j<nrows; j++) { 3654 b_othi[k+1] = b_othi[k] + rowlen[j]; 3655 len += rowlen[j]; k++; 3656 } 3657 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3658 } 3659 3660 /* allocate space for j and a arrrays of B_oth */ 3661 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3662 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3663 3664 /* j-array */ 3665 /*---------*/ 3666 /* post receives of j-array */ 3667 for (i=0; i<nrecvs; i++){ 3668 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3669 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3670 } 3671 k = 0; 3672 for (i=0; i<nsends; i++){ 3673 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3674 bufJ = bufj+sstartsj[i]; 3675 for (j=0; j<nrows; j++) { 3676 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3677 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3678 for (l=0; l<ncols; l++){ 3679 *bufJ++ = cols[l]; 3680 } 3681 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3682 } 3683 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3684 } 3685 3686 /* recvs and sends of j-array are completed */ 3687 i = nrecvs; 3688 while (i--) { 3689 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3690 } 3691 if (nsends) { 3692 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3693 } 3694 } else if (scall == MAT_REUSE_MATRIX){ 3695 sstartsj = *startsj; 3696 rstartsj = sstartsj + nsends +1; 3697 bufa = *bufa_ptr; 3698 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3699 b_otha = b_oth->a; 3700 } else { 3701 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3702 } 3703 3704 /* a-array */ 3705 /*---------*/ 3706 /* post receives of a-array */ 3707 for (i=0; i<nrecvs; i++){ 3708 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3709 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3710 } 3711 k = 0; 3712 for (i=0; i<nsends; i++){ 3713 nrows = sstarts[i+1]-sstarts[i]; 3714 bufA = bufa+sstartsj[i]; 3715 for (j=0; j<nrows; j++) { 3716 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3717 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3718 for (l=0; l<ncols; l++){ 3719 *bufA++ = vals[l]; 3720 } 3721 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3722 3723 } 3724 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3725 } 3726 /* recvs and sends of a-array are completed */ 3727 i = nrecvs; 3728 while (i--) { 3729 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3730 } 3731 if (nsends) { 3732 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3733 } 3734 3735 if (scall == MAT_INITIAL_MATRIX){ 3736 /* put together the new matrix */ 3737 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3738 3739 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3740 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3741 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3742 b_oth->freedata = PETSC_TRUE; 3743 b_oth->nonew = 0; 3744 3745 ierr = PetscFree(bufj);CHKERRQ(ierr); 3746 if (!startsj || !bufa_ptr){ 3747 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3748 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3749 } else { 3750 *startsj = sstartsj; 3751 *bufa_ptr = bufa; 3752 } 3753 } 3754 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3755 3756 PetscFunctionReturn(0); 3757 } 3758 3759 /*MC 3760 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3761 3762 Options Database Keys: 3763 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3764 3765 Level: beginner 3766 3767 .seealso: MatCreateMPIAIJ 3768 M*/ 3769 3770 EXTERN_C_BEGIN 3771 #undef __FUNCT__ 3772 #define __FUNCT__ "MatCreate_MPIAIJ" 3773 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3774 { 3775 Mat_MPIAIJ *b; 3776 PetscErrorCode ierr; 3777 PetscInt i; 3778 PetscMPIInt size; 3779 3780 PetscFunctionBegin; 3781 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3782 3783 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3784 B->data = (void*)b; 3785 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3786 B->factor = 0; 3787 B->bs = 1; 3788 B->assembled = PETSC_FALSE; 3789 B->mapping = 0; 3790 3791 B->insertmode = NOT_SET_VALUES; 3792 b->size = size; 3793 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3794 3795 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3796 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3797 3798 /* the information in the maps duplicates the information computed below, eventually 3799 we should remove the duplicate information that is not contained in the maps */ 3800 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3801 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3802 3803 /* build local table of row and column ownerships */ 3804 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3805 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3806 b->cowners = b->rowners + b->size + 2; 3807 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3808 b->rowners[0] = 0; 3809 for (i=2; i<=b->size; i++) { 3810 b->rowners[i] += b->rowners[i-1]; 3811 } 3812 b->rstart = b->rowners[b->rank]; 3813 b->rend = b->rowners[b->rank+1]; 3814 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3815 b->cowners[0] = 0; 3816 for (i=2; i<=b->size; i++) { 3817 b->cowners[i] += b->cowners[i-1]; 3818 } 3819 b->cstart = b->cowners[b->rank]; 3820 b->cend = b->cowners[b->rank+1]; 3821 3822 /* build cache for off array entries formed */ 3823 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3824 b->donotstash = PETSC_FALSE; 3825 b->colmap = 0; 3826 b->garray = 0; 3827 b->roworiented = PETSC_TRUE; 3828 3829 /* stuff used for matrix vector multiply */ 3830 b->lvec = PETSC_NULL; 3831 b->Mvctx = PETSC_NULL; 3832 3833 /* stuff for MatGetRow() */ 3834 b->rowindices = 0; 3835 b->rowvalues = 0; 3836 b->getrowactive = PETSC_FALSE; 3837 3838 /* Explicitly create 2 MATSEQAIJ matrices. */ 3839 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3840 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3841 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3842 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3843 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3844 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3845 3846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3847 "MatStoreValues_MPIAIJ", 3848 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3849 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3850 "MatRetrieveValues_MPIAIJ", 3851 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3852 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3853 "MatGetDiagonalBlock_MPIAIJ", 3854 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3855 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3856 "MatIsTranspose_MPIAIJ", 3857 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3858 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3859 "MatMPIAIJSetPreallocation_MPIAIJ", 3860 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3861 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3862 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3863 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3864 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3865 "MatDiagonalScaleLocal_MPIAIJ", 3866 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3867 PetscFunctionReturn(0); 3868 } 3869 EXTERN_C_END 3870