1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 58 if (nonew == 1) goto a_noinsert; \ 59 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 60 if (nrow >= rmax) { \ 61 /* there is no extra room in row, therefore enlarge */ \ 62 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 63 PetscScalar *new_a; \ 64 \ 65 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 66 \ 67 /* malloc new storage space */ \ 68 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 69 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 70 new_j = (PetscInt*)(new_a + new_nz); \ 71 new_i = new_j + new_nz; \ 72 \ 73 /* copy over old data into new slots */ \ 74 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 75 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 76 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 77 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 78 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 79 len*sizeof(PetscInt));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 82 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 83 /* free up old matrix storage */ \ 84 \ 85 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 86 if (!a->singlemalloc) { \ 87 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 88 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 89 } \ 90 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 91 a->singlemalloc = PETSC_TRUE; \ 92 \ 93 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 94 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 95 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 96 a->maxnz += CHUNKSIZE; \ 97 a->reallocs++; \ 98 } \ 99 N = nrow++ - 1; a->nz++; \ 100 /* shift up all the later entries in this row */ \ 101 for (ii=N; ii>=_i; ii--) { \ 102 rp[ii+1] = rp[ii]; \ 103 ap[ii+1] = ap[ii]; \ 104 } \ 105 rp[_i] = col1; \ 106 ap[_i] = value; \ 107 a_noinsert: ; \ 108 ailen[row] = nrow; \ 109 } 110 111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 112 { \ 113 \ 114 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 115 rmax = bimax[row]; nrow = bilen[row]; \ 116 col1 = col - shift; \ 117 \ 118 low = 0; high = nrow; \ 119 while (high-low > 5) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > col) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > col1) break; \ 126 if (rp[_i] == col1) { \ 127 if (addv == ADD_VALUES) ap[_i] += value; \ 128 else ap[_i] = value; \ 129 goto b_noinsert; \ 130 } \ 131 } \ 132 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 133 if (nonew == 1) goto b_noinsert; \ 134 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 if (nrow >= rmax) { \ 136 /* there is no extra room in row, therefore enlarge */ \ 137 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 138 PetscScalar *new_a; \ 139 \ 140 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 141 \ 142 /* malloc new storage space */ \ 143 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 144 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 145 new_j = (PetscInt*)(new_a + new_nz); \ 146 new_i = new_j + new_nz; \ 147 \ 148 /* copy over old data into new slots */ \ 149 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 150 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 151 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 152 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 153 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 154 len*sizeof(PetscInt));CHKERRQ(ierr); \ 155 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 157 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 158 /* free up old matrix storage */ \ 159 \ 160 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 161 if (!b->singlemalloc) { \ 162 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 163 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 164 } \ 165 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 166 b->singlemalloc = PETSC_TRUE; \ 167 \ 168 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 169 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 170 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 171 b->maxnz += CHUNKSIZE; \ 172 b->reallocs++; \ 173 } \ 174 N = nrow++ - 1; b->nz++; \ 175 /* shift up all the later entries in this row */ \ 176 for (ii=N; ii>=_i; ii--) { \ 177 rp[ii+1] = rp[ii]; \ 178 ap[ii+1] = ap[ii]; \ 179 } \ 180 rp[_i] = col1; \ 181 ap[_i] = value; \ 182 b_noinsert: ; \ 183 bilen[row] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIAIJ" 188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 191 PetscScalar value; 192 PetscErrorCode ierr; 193 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 194 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 195 PetscTruth roworiented = aij->roworiented; 196 197 /* Some Variables required in the macro */ 198 Mat A = aij->A; 199 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 200 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 201 PetscScalar *aa = a->a; 202 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 203 Mat B = aij->B; 204 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 205 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 206 PetscScalar *ba = b->a; 207 208 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 209 PetscInt nonew = a->nonew,shift=0; 210 PetscScalar *ap; 211 212 PetscFunctionBegin; 213 for (i=0; i<m; i++) { 214 if (im[i] < 0) continue; 215 #if defined(PETSC_USE_DEBUG) 216 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 217 #endif 218 if (im[i] >= rstart && im[i] < rend) { 219 row = im[i] - rstart; 220 for (j=0; j<n; j++) { 221 if (in[j] >= cstart && in[j] < cend){ 222 col = in[j] - cstart; 223 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 224 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 225 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 226 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 227 } else if (in[j] < 0) continue; 228 #if defined(PETSC_USE_DEBUG) 229 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 230 #endif 231 else { 232 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 233 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 234 if (mat->was_assembled) { 235 if (!aij->colmap) { 236 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 240 col--; 241 #else 242 col = aij->colmap[in[j]] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 245 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 248 B = aij->B; 249 b = (Mat_SeqAIJ*)B->data; 250 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 251 ba = b->a; 252 } 253 } else col = in[j]; 254 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (!aij->donotstash) { 260 if (roworiented) { 261 if (ignorezeroentries && v[i*n] == 0.0) continue; 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 263 } else { 264 if (ignorezeroentries && v[i] == 0.0) continue; 265 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 266 } 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatGetValues_MPIAIJ" 275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 276 { 277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 280 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 281 282 PetscFunctionBegin; 283 for (i=0; i<m; i++) { 284 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 285 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 286 if (idxm[i] >= rstart && idxm[i] < rend) { 287 row = idxm[i] - rstart; 288 for (j=0; j<n; j++) { 289 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 290 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 291 if (idxn[j] >= cstart && idxn[j] < cend){ 292 col = idxn[j] - cstart; 293 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 294 } else { 295 if (!aij->colmap) { 296 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 297 } 298 #if defined (PETSC_USE_CTABLE) 299 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 300 col --; 301 #else 302 col = aij->colmap[idxn[j]] - 1; 303 #endif 304 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 305 else { 306 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 307 } 308 } 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 312 } 313 } 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 320 { 321 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 322 PetscErrorCode ierr; 323 PetscInt nstash,reallocs; 324 InsertMode addv; 325 326 PetscFunctionBegin; 327 if (aij->donotstash) { 328 PetscFunctionReturn(0); 329 } 330 331 /* make sure all processors are either in INSERTMODE or ADDMODE */ 332 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 333 if (addv == (ADD_VALUES|INSERT_VALUES)) { 334 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 335 } 336 mat->insertmode = addv; /* in case this processor had no cache */ 337 338 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 339 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 340 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 347 { 348 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 349 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 350 PetscErrorCode ierr; 351 PetscMPIInt n; 352 PetscInt i,j,rstart,ncols,flg; 353 PetscInt *row,*col,other_disassembled; 354 PetscScalar *val; 355 InsertMode addv = mat->insertmode; 356 357 PetscFunctionBegin; 358 if (!aij->donotstash) { 359 while (1) { 360 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 361 if (!flg) break; 362 363 for (i=0; i<n;) { 364 /* Now identify the consecutive vals belonging to the same row */ 365 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 366 if (j < n) ncols = j-i; 367 else ncols = n-i; 368 /* Now assemble all these values with a single function call */ 369 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 370 i = j; 371 } 372 } 373 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 374 } 375 a->compressedrow.use = PETSC_FALSE; 376 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 377 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 378 379 /* determine if any processor has disassembled, if so we must 380 also disassemble ourselfs, in order that we may reassemble. */ 381 /* 382 if nonzero structure of submatrix B cannot change then we know that 383 no processor disassembled thus we can skip this stuff 384 */ 385 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 386 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 387 if (mat->was_assembled && !other_disassembled) { 388 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 389 /* reaccess the b because aij->B was changed */ 390 b = (Mat_SeqAIJ *)aij->B->data; 391 } 392 } 393 394 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 395 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 396 } 397 398 b->inode.use = PETSC_FALSE; 399 b->compressedrow.use = PETSC_TRUE; 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNCT__ 416 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 418 { 419 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 420 PetscErrorCode ierr; 421 422 PetscFunctionBegin; 423 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 424 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatZeroRows_MPIAIJ" 430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 431 { 432 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 433 PetscErrorCode ierr; 434 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 435 PetscInt i,N,*rows,*owners = l->rowners; 436 PetscInt *nprocs,j,idx,nsends,row; 437 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 438 PetscInt *rvalues,count,base,slen,*source; 439 PetscInt *lens,*lrows,*values,rstart=l->rstart; 440 MPI_Comm comm = A->comm; 441 MPI_Request *send_waits,*recv_waits; 442 MPI_Status recv_status,*send_status; 443 IS istmp; 444 PetscTruth found; 445 446 PetscFunctionBegin; 447 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 448 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 449 450 /* first count number of contributors to each processor */ 451 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 452 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 453 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 454 for (i=0; i<N; i++) { 455 idx = rows[i]; 456 found = PETSC_FALSE; 457 for (j=0; j<size; j++) { 458 if (idx >= owners[j] && idx < owners[j+1]) { 459 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 460 } 461 } 462 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 463 } 464 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 465 466 /* inform other processors of number of messages and max length*/ 467 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 468 469 /* post receives: */ 470 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 471 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 472 for (i=0; i<nrecvs; i++) { 473 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 474 } 475 476 /* do sends: 477 1) starts[i] gives the starting index in svalues for stuff going to 478 the ith processor 479 */ 480 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 481 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 482 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 483 starts[0] = 0; 484 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 485 for (i=0; i<N; i++) { 486 svalues[starts[owner[i]]++] = rows[i]; 487 } 488 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 489 490 starts[0] = 0; 491 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 492 count = 0; 493 for (i=0; i<size; i++) { 494 if (nprocs[2*i+1]) { 495 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 496 } 497 } 498 ierr = PetscFree(starts);CHKERRQ(ierr); 499 500 base = owners[rank]; 501 502 /* wait on receives */ 503 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 504 source = lens + nrecvs; 505 count = nrecvs; slen = 0; 506 while (count) { 507 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 508 /* unpack receives into our local space */ 509 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 510 source[imdex] = recv_status.MPI_SOURCE; 511 lens[imdex] = n; 512 slen += n; 513 count--; 514 } 515 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 516 517 /* move the data into the send scatter */ 518 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 519 count = 0; 520 for (i=0; i<nrecvs; i++) { 521 values = rvalues + i*nmax; 522 for (j=0; j<lens[i]; j++) { 523 lrows[count++] = values[j] - base; 524 } 525 } 526 ierr = PetscFree(rvalues);CHKERRQ(ierr); 527 ierr = PetscFree(lens);CHKERRQ(ierr); 528 ierr = PetscFree(owner);CHKERRQ(ierr); 529 ierr = PetscFree(nprocs);CHKERRQ(ierr); 530 531 /* actually zap the local rows */ 532 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 533 PetscLogObjectParent(A,istmp); 534 535 /* 536 Zero the required rows. If the "diagonal block" of the matrix 537 is square and the user wishes to set the diagonal we use seperate 538 code so that MatSetValues() is not called for each diagonal allocating 539 new memory, thus calling lots of mallocs and slowing things down. 540 541 Contributed by: Mathew Knepley 542 */ 543 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 544 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 545 if (diag && (l->A->M == l->A->N)) { 546 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 547 } else if (diag) { 548 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 549 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 550 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 551 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 552 } 553 for (i = 0; i < slen; i++) { 554 row = lrows[i] + rstart; 555 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 556 } 557 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 } else { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 } 562 ierr = ISDestroy(istmp);CHKERRQ(ierr); 563 ierr = PetscFree(lrows);CHKERRQ(ierr); 564 565 /* wait on sends */ 566 if (nsends) { 567 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 568 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 569 ierr = PetscFree(send_status);CHKERRQ(ierr); 570 } 571 ierr = PetscFree(send_waits);CHKERRQ(ierr); 572 ierr = PetscFree(svalues);CHKERRQ(ierr); 573 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatMult_MPIAIJ" 579 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 580 { 581 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 582 PetscErrorCode ierr; 583 PetscInt nt; 584 585 PetscFunctionBegin; 586 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 587 if (nt != A->n) { 588 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 589 } 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultAdd_MPIAIJ" 599 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 PetscErrorCode ierr; 603 604 PetscFunctionBegin; 605 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 606 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 607 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 608 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 614 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 615 { 616 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 617 PetscErrorCode ierr; 618 PetscTruth merged; 619 620 PetscFunctionBegin; 621 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 622 /* do nondiagonal part */ 623 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 624 if (!merged) { 625 /* send it on its way */ 626 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 627 /* do local part */ 628 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 629 /* receive remote parts: note this assumes the values are not actually */ 630 /* added in yy until the next line, */ 631 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 632 } else { 633 /* do local part */ 634 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 635 /* send it on its way */ 636 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 637 /* values actually were received in the Begin() but we need to call this nop */ 638 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 639 } 640 PetscFunctionReturn(0); 641 } 642 643 EXTERN_C_BEGIN 644 #undef __FUNCT__ 645 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 646 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 647 { 648 MPI_Comm comm; 649 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 650 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 651 IS Me,Notme; 652 PetscErrorCode ierr; 653 PetscInt M,N,first,last,*notme,i; 654 PetscMPIInt size; 655 656 PetscFunctionBegin; 657 658 /* Easy test: symmetric diagonal block */ 659 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 660 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 661 if (!*f) PetscFunctionReturn(0); 662 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 663 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 664 if (size == 1) PetscFunctionReturn(0); 665 666 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 667 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 668 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 669 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 670 for (i=0; i<first; i++) notme[i] = i; 671 for (i=last; i<M; i++) notme[i-last+first] = i; 672 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 673 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 674 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 675 Aoff = Aoffs[0]; 676 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 677 Boff = Boffs[0]; 678 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 679 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 680 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 681 ierr = ISDestroy(Me);CHKERRQ(ierr); 682 ierr = ISDestroy(Notme);CHKERRQ(ierr); 683 684 PetscFunctionReturn(0); 685 } 686 EXTERN_C_END 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 690 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 691 { 692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 693 PetscErrorCode ierr; 694 695 PetscFunctionBegin; 696 /* do nondiagonal part */ 697 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 698 /* send it on its way */ 699 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 700 /* do local part */ 701 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 702 /* receive remote parts */ 703 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This only works correctly for square matrices where the subblock A->A is the 709 diagonal block 710 */ 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 713 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 714 { 715 PetscErrorCode ierr; 716 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 717 718 PetscFunctionBegin; 719 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 720 if (a->rstart != a->cstart || a->rend != a->cend) { 721 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 722 } 723 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatScale_MPIAIJ" 729 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 730 { 731 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 736 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatDestroy_MPIAIJ" 742 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 #if defined(PETSC_USE_LOG) 749 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 750 #endif 751 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 752 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 753 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 754 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 755 #if defined (PETSC_USE_CTABLE) 756 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 757 #else 758 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 759 #endif 760 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 761 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 762 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 763 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 764 ierr = PetscFree(aij);CHKERRQ(ierr); 765 766 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 767 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 768 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 769 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 770 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 771 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 772 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 773 PetscFunctionReturn(0); 774 } 775 776 #undef __FUNCT__ 777 #define __FUNCT__ "MatView_MPIAIJ_Binary" 778 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 779 { 780 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 781 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 782 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 783 PetscErrorCode ierr; 784 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 785 int fd; 786 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 787 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 788 PetscScalar *column_values; 789 790 PetscFunctionBegin; 791 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 792 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 793 nz = A->nz + B->nz; 794 if (!rank) { 795 header[0] = MAT_FILE_COOKIE; 796 header[1] = mat->M; 797 header[2] = mat->N; 798 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 800 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 801 /* get largest number of rows any processor has */ 802 rlen = mat->m; 803 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 804 for (i=1; i<size; i++) { 805 rlen = PetscMax(rlen,range[i+1] - range[i]); 806 } 807 } else { 808 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 809 rlen = mat->m; 810 } 811 812 /* load up the local row counts */ 813 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 814 for (i=0; i<mat->m; i++) { 815 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 816 } 817 818 /* store the row lengths to the file */ 819 if (!rank) { 820 MPI_Status status; 821 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 822 for (i=1; i<size; i++) { 823 rlen = range[i+1] - range[i]; 824 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 825 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 826 } 827 } else { 828 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 829 } 830 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 831 832 /* load up the local column indices */ 833 nzmax = nz; /* )th processor needs space a largest processor needs */ 834 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 835 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 836 cnt = 0; 837 for (i=0; i<mat->m; i++) { 838 for (j=B->i[i]; j<B->i[i+1]; j++) { 839 if ( (col = garray[B->j[j]]) > cstart) break; 840 column_indices[cnt++] = col; 841 } 842 for (k=A->i[i]; k<A->i[i+1]; k++) { 843 column_indices[cnt++] = A->j[k] + cstart; 844 } 845 for (; j<B->i[i+1]; j++) { 846 column_indices[cnt++] = garray[B->j[j]]; 847 } 848 } 849 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 850 851 /* store the column indices to the file */ 852 if (!rank) { 853 MPI_Status status; 854 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 855 for (i=1; i<size; i++) { 856 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 857 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 858 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 859 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 860 } 861 } else { 862 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 863 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(column_indices);CHKERRQ(ierr); 866 867 /* load up the local column values */ 868 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 869 cnt = 0; 870 for (i=0; i<mat->m; i++) { 871 for (j=B->i[i]; j<B->i[i+1]; j++) { 872 if ( garray[B->j[j]] > cstart) break; 873 column_values[cnt++] = B->a[j]; 874 } 875 for (k=A->i[i]; k<A->i[i+1]; k++) { 876 column_values[cnt++] = A->a[k]; 877 } 878 for (; j<B->i[i+1]; j++) { 879 column_values[cnt++] = B->a[j]; 880 } 881 } 882 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 883 884 /* store the column values to the file */ 885 if (!rank) { 886 MPI_Status status; 887 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 888 for (i=1; i<size; i++) { 889 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 890 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 891 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 892 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 893 } 894 } else { 895 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 896 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 897 } 898 ierr = PetscFree(column_values);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 904 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 905 { 906 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 907 PetscErrorCode ierr; 908 PetscMPIInt rank = aij->rank,size = aij->size; 909 PetscTruth isdraw,iascii,isbinary; 910 PetscViewer sviewer; 911 PetscViewerFormat format; 912 913 PetscFunctionBegin; 914 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 915 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 916 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 917 if (iascii) { 918 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 919 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 920 MatInfo info; 921 Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data; 922 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 923 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 924 if (!a->inode.size) { 925 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 926 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 927 } else { 928 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 929 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 930 } 931 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 932 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 933 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 934 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 935 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 936 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 937 PetscFunctionReturn(0); 938 } else if (format == PETSC_VIEWER_ASCII_INFO) { 939 Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data; 940 if (a->inode.size) { 941 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",a->inode.node_count,a->inode.limit);CHKERRQ(ierr); 942 } else { 943 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 944 } 945 PetscFunctionReturn(0); 946 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 947 PetscFunctionReturn(0); 948 } 949 } else if (isbinary) { 950 if (size == 1) { 951 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 952 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 953 } else { 954 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 955 } 956 PetscFunctionReturn(0); 957 } else if (isdraw) { 958 PetscDraw draw; 959 PetscTruth isnull; 960 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 961 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 962 } 963 964 if (size == 1) { 965 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 966 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 967 } else { 968 /* assemble the entire matrix onto first processor. */ 969 Mat A; 970 Mat_SeqAIJ *Aloc; 971 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 972 PetscScalar *a; 973 974 if (!rank) { 975 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 976 } else { 977 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 978 } 979 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 980 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 981 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 982 PetscLogObjectParent(mat,A); 983 984 /* copy over the A part */ 985 Aloc = (Mat_SeqAIJ*)aij->A->data; 986 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 987 row = aij->rstart; 988 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 989 for (i=0; i<m; i++) { 990 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 991 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 992 } 993 aj = Aloc->j; 994 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 995 996 /* copy over the B part */ 997 Aloc = (Mat_SeqAIJ*)aij->B->data; 998 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 999 row = aij->rstart; 1000 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1001 ct = cols; 1002 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1003 for (i=0; i<m; i++) { 1004 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1005 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1006 } 1007 ierr = PetscFree(ct);CHKERRQ(ierr); 1008 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1009 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1010 /* 1011 Everyone has to call to draw the matrix since the graphics waits are 1012 synchronized across all processors that share the PetscDraw object 1013 */ 1014 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1015 if (!rank) { 1016 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1017 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1018 } 1019 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1020 ierr = MatDestroy(A);CHKERRQ(ierr); 1021 } 1022 PetscFunctionReturn(0); 1023 } 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatView_MPIAIJ" 1027 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1028 { 1029 PetscErrorCode ierr; 1030 PetscTruth iascii,isdraw,issocket,isbinary; 1031 1032 PetscFunctionBegin; 1033 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1034 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1035 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1036 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1037 if (iascii || isdraw || isbinary || issocket) { 1038 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1039 } else { 1040 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1041 } 1042 PetscFunctionReturn(0); 1043 } 1044 1045 1046 1047 #undef __FUNCT__ 1048 #define __FUNCT__ "MatRelax_MPIAIJ" 1049 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1050 { 1051 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1052 PetscErrorCode ierr; 1053 Vec bb1; 1054 PetscScalar mone=-1.0; 1055 1056 PetscFunctionBegin; 1057 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1058 1059 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1060 1061 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1062 if (flag & SOR_ZERO_INITIAL_GUESS) { 1063 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1064 its--; 1065 } 1066 1067 while (its--) { 1068 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 1071 /* update rhs: bb1 = bb - B*x */ 1072 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1073 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1074 1075 /* local sweep */ 1076 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1077 CHKERRQ(ierr); 1078 } 1079 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1080 if (flag & SOR_ZERO_INITIAL_GUESS) { 1081 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1082 its--; 1083 } 1084 while (its--) { 1085 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1086 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1087 1088 /* update rhs: bb1 = bb - B*x */ 1089 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1090 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1091 1092 /* local sweep */ 1093 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1094 CHKERRQ(ierr); 1095 } 1096 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1097 if (flag & SOR_ZERO_INITIAL_GUESS) { 1098 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1099 its--; 1100 } 1101 while (its--) { 1102 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1103 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1104 1105 /* update rhs: bb1 = bb - B*x */ 1106 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1107 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1108 1109 /* local sweep */ 1110 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1111 CHKERRQ(ierr); 1112 } 1113 } else { 1114 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1115 } 1116 1117 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1123 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1124 { 1125 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1126 Mat A = mat->A,B = mat->B; 1127 PetscErrorCode ierr; 1128 PetscReal isend[5],irecv[5]; 1129 1130 PetscFunctionBegin; 1131 info->block_size = 1.0; 1132 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1133 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1134 isend[3] = info->memory; isend[4] = info->mallocs; 1135 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1136 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1137 isend[3] += info->memory; isend[4] += info->mallocs; 1138 if (flag == MAT_LOCAL) { 1139 info->nz_used = isend[0]; 1140 info->nz_allocated = isend[1]; 1141 info->nz_unneeded = isend[2]; 1142 info->memory = isend[3]; 1143 info->mallocs = isend[4]; 1144 } else if (flag == MAT_GLOBAL_MAX) { 1145 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1146 info->nz_used = irecv[0]; 1147 info->nz_allocated = irecv[1]; 1148 info->nz_unneeded = irecv[2]; 1149 info->memory = irecv[3]; 1150 info->mallocs = irecv[4]; 1151 } else if (flag == MAT_GLOBAL_SUM) { 1152 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1153 info->nz_used = irecv[0]; 1154 info->nz_allocated = irecv[1]; 1155 info->nz_unneeded = irecv[2]; 1156 info->memory = irecv[3]; 1157 info->mallocs = irecv[4]; 1158 } 1159 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1160 info->fill_ratio_needed = 0; 1161 info->factor_mallocs = 0; 1162 info->rows_global = (double)matin->M; 1163 info->columns_global = (double)matin->N; 1164 info->rows_local = (double)matin->m; 1165 info->columns_local = (double)matin->N; 1166 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatSetOption_MPIAIJ" 1172 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1173 { 1174 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 switch (op) { 1179 case MAT_NO_NEW_NONZERO_LOCATIONS: 1180 case MAT_YES_NEW_NONZERO_LOCATIONS: 1181 case MAT_COLUMNS_UNSORTED: 1182 case MAT_COLUMNS_SORTED: 1183 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1184 case MAT_KEEP_ZEROED_ROWS: 1185 case MAT_NEW_NONZERO_LOCATION_ERR: 1186 case MAT_USE_INODES: 1187 case MAT_DO_NOT_USE_INODES: 1188 case MAT_IGNORE_ZERO_ENTRIES: 1189 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1190 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1191 break; 1192 case MAT_ROW_ORIENTED: 1193 a->roworiented = PETSC_TRUE; 1194 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1195 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1196 break; 1197 case MAT_ROWS_SORTED: 1198 case MAT_ROWS_UNSORTED: 1199 case MAT_YES_NEW_DIAGONALS: 1200 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1201 break; 1202 case MAT_COLUMN_ORIENTED: 1203 a->roworiented = PETSC_FALSE; 1204 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1205 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1206 break; 1207 case MAT_IGNORE_OFF_PROC_ENTRIES: 1208 a->donotstash = PETSC_TRUE; 1209 break; 1210 case MAT_NO_NEW_DIAGONALS: 1211 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1212 case MAT_SYMMETRIC: 1213 case MAT_STRUCTURALLY_SYMMETRIC: 1214 case MAT_HERMITIAN: 1215 case MAT_SYMMETRY_ETERNAL: 1216 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1217 break; 1218 case MAT_NOT_SYMMETRIC: 1219 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1220 case MAT_NOT_HERMITIAN: 1221 case MAT_NOT_SYMMETRY_ETERNAL: 1222 break; 1223 default: 1224 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1225 } 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatGetRow_MPIAIJ" 1231 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1232 { 1233 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1234 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1235 PetscErrorCode ierr; 1236 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1237 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1238 PetscInt *cmap,*idx_p; 1239 1240 PetscFunctionBegin; 1241 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1242 mat->getrowactive = PETSC_TRUE; 1243 1244 if (!mat->rowvalues && (idx || v)) { 1245 /* 1246 allocate enough space to hold information from the longest row. 1247 */ 1248 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1249 PetscInt max = 1,tmp; 1250 for (i=0; i<matin->m; i++) { 1251 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1252 if (max < tmp) { max = tmp; } 1253 } 1254 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1255 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1256 } 1257 1258 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1259 lrow = row - rstart; 1260 1261 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1262 if (!v) {pvA = 0; pvB = 0;} 1263 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1264 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1265 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1266 nztot = nzA + nzB; 1267 1268 cmap = mat->garray; 1269 if (v || idx) { 1270 if (nztot) { 1271 /* Sort by increasing column numbers, assuming A and B already sorted */ 1272 PetscInt imark = -1; 1273 if (v) { 1274 *v = v_p = mat->rowvalues; 1275 for (i=0; i<nzB; i++) { 1276 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1277 else break; 1278 } 1279 imark = i; 1280 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1281 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1282 } 1283 if (idx) { 1284 *idx = idx_p = mat->rowindices; 1285 if (imark > -1) { 1286 for (i=0; i<imark; i++) { 1287 idx_p[i] = cmap[cworkB[i]]; 1288 } 1289 } else { 1290 for (i=0; i<nzB; i++) { 1291 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1292 else break; 1293 } 1294 imark = i; 1295 } 1296 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1297 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1298 } 1299 } else { 1300 if (idx) *idx = 0; 1301 if (v) *v = 0; 1302 } 1303 } 1304 *nz = nztot; 1305 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1306 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1312 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 1316 PetscFunctionBegin; 1317 if (!aij->getrowactive) { 1318 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1319 } 1320 aij->getrowactive = PETSC_FALSE; 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatNorm_MPIAIJ" 1326 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1327 { 1328 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1329 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1330 PetscErrorCode ierr; 1331 PetscInt i,j,cstart = aij->cstart; 1332 PetscReal sum = 0.0; 1333 PetscScalar *v; 1334 1335 PetscFunctionBegin; 1336 if (aij->size == 1) { 1337 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1338 } else { 1339 if (type == NORM_FROBENIUS) { 1340 v = amat->a; 1341 for (i=0; i<amat->nz; i++) { 1342 #if defined(PETSC_USE_COMPLEX) 1343 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1344 #else 1345 sum += (*v)*(*v); v++; 1346 #endif 1347 } 1348 v = bmat->a; 1349 for (i=0; i<bmat->nz; i++) { 1350 #if defined(PETSC_USE_COMPLEX) 1351 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1352 #else 1353 sum += (*v)*(*v); v++; 1354 #endif 1355 } 1356 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1357 *norm = sqrt(*norm); 1358 } else if (type == NORM_1) { /* max column norm */ 1359 PetscReal *tmp,*tmp2; 1360 PetscInt *jj,*garray = aij->garray; 1361 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1362 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1363 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1364 *norm = 0.0; 1365 v = amat->a; jj = amat->j; 1366 for (j=0; j<amat->nz; j++) { 1367 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1368 } 1369 v = bmat->a; jj = bmat->j; 1370 for (j=0; j<bmat->nz; j++) { 1371 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1372 } 1373 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1374 for (j=0; j<mat->N; j++) { 1375 if (tmp2[j] > *norm) *norm = tmp2[j]; 1376 } 1377 ierr = PetscFree(tmp);CHKERRQ(ierr); 1378 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1379 } else if (type == NORM_INFINITY) { /* max row norm */ 1380 PetscReal ntemp = 0.0; 1381 for (j=0; j<aij->A->m; j++) { 1382 v = amat->a + amat->i[j]; 1383 sum = 0.0; 1384 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1385 sum += PetscAbsScalar(*v); v++; 1386 } 1387 v = bmat->a + bmat->i[j]; 1388 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1389 sum += PetscAbsScalar(*v); v++; 1390 } 1391 if (sum > ntemp) ntemp = sum; 1392 } 1393 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1394 } else { 1395 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1396 } 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatTranspose_MPIAIJ" 1403 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1404 { 1405 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1406 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1407 PetscErrorCode ierr; 1408 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1409 Mat B; 1410 PetscScalar *array; 1411 1412 PetscFunctionBegin; 1413 if (!matout && M != N) { 1414 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1415 } 1416 1417 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1418 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1419 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1420 1421 /* copy over the A part */ 1422 Aloc = (Mat_SeqAIJ*)a->A->data; 1423 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1424 row = a->rstart; 1425 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1426 for (i=0; i<m; i++) { 1427 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1428 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1429 } 1430 aj = Aloc->j; 1431 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1432 1433 /* copy over the B part */ 1434 Aloc = (Mat_SeqAIJ*)a->B->data; 1435 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1436 row = a->rstart; 1437 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1438 ct = cols; 1439 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1440 for (i=0; i<m; i++) { 1441 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1442 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1443 } 1444 ierr = PetscFree(ct);CHKERRQ(ierr); 1445 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1446 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1447 if (matout) { 1448 *matout = B; 1449 } else { 1450 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1451 } 1452 PetscFunctionReturn(0); 1453 } 1454 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1457 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1458 { 1459 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1460 Mat a = aij->A,b = aij->B; 1461 PetscErrorCode ierr; 1462 PetscInt s1,s2,s3; 1463 1464 PetscFunctionBegin; 1465 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1466 if (rr) { 1467 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1468 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1469 /* Overlap communication with computation. */ 1470 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1471 } 1472 if (ll) { 1473 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1474 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1475 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1476 } 1477 /* scale the diagonal block */ 1478 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1479 1480 if (rr) { 1481 /* Do a scatter end and then right scale the off-diagonal block */ 1482 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1483 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1484 } 1485 1486 PetscFunctionReturn(0); 1487 } 1488 1489 1490 #undef __FUNCT__ 1491 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1492 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1493 { 1494 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1495 PetscErrorCode ierr; 1496 1497 PetscFunctionBegin; 1498 if (!a->rank) { 1499 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1506 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1507 { 1508 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1509 PetscErrorCode ierr; 1510 1511 PetscFunctionBegin; 1512 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1513 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1514 PetscFunctionReturn(0); 1515 } 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1518 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1519 { 1520 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1521 PetscErrorCode ierr; 1522 1523 PetscFunctionBegin; 1524 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1525 PetscFunctionReturn(0); 1526 } 1527 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatEqual_MPIAIJ" 1530 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1531 { 1532 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1533 Mat a,b,c,d; 1534 PetscTruth flg; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 a = matA->A; b = matA->B; 1539 c = matB->A; d = matB->B; 1540 1541 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1542 if (flg) { 1543 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1544 } 1545 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatCopy_MPIAIJ" 1551 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1552 { 1553 PetscErrorCode ierr; 1554 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1555 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1556 1557 PetscFunctionBegin; 1558 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1559 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1560 /* because of the column compression in the off-processor part of the matrix a->B, 1561 the number of columns in a->B and b->B may be different, hence we cannot call 1562 the MatCopy() directly on the two parts. If need be, we can provide a more 1563 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1564 then copying the submatrices */ 1565 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1566 } else { 1567 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1568 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1569 } 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1575 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1576 { 1577 PetscErrorCode ierr; 1578 1579 PetscFunctionBegin; 1580 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #include "petscblaslapack.h" 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatAXPY_MPIAIJ" 1587 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1588 { 1589 PetscErrorCode ierr; 1590 PetscInt i; 1591 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1592 PetscBLASInt bnz,one=1; 1593 Mat_SeqAIJ *x,*y; 1594 1595 PetscFunctionBegin; 1596 if (str == SAME_NONZERO_PATTERN) { 1597 x = (Mat_SeqAIJ *)xx->A->data; 1598 y = (Mat_SeqAIJ *)yy->A->data; 1599 bnz = (PetscBLASInt)x->nz; 1600 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1601 x = (Mat_SeqAIJ *)xx->B->data; 1602 y = (Mat_SeqAIJ *)yy->B->data; 1603 bnz = (PetscBLASInt)x->nz; 1604 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1605 } else if (str == SUBSET_NONZERO_PATTERN) { 1606 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1607 1608 x = (Mat_SeqAIJ *)xx->B->data; 1609 y = (Mat_SeqAIJ *)yy->B->data; 1610 if (y->xtoy && y->XtoY != xx->B) { 1611 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1612 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1613 } 1614 if (!y->xtoy) { /* get xtoy */ 1615 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1616 y->XtoY = xx->B; 1617 } 1618 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1619 } else { 1620 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1621 } 1622 PetscFunctionReturn(0); 1623 } 1624 1625 /* -------------------------------------------------------------------*/ 1626 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1627 MatGetRow_MPIAIJ, 1628 MatRestoreRow_MPIAIJ, 1629 MatMult_MPIAIJ, 1630 /* 4*/ MatMultAdd_MPIAIJ, 1631 MatMultTranspose_MPIAIJ, 1632 MatMultTransposeAdd_MPIAIJ, 1633 0, 1634 0, 1635 0, 1636 /*10*/ 0, 1637 0, 1638 0, 1639 MatRelax_MPIAIJ, 1640 MatTranspose_MPIAIJ, 1641 /*15*/ MatGetInfo_MPIAIJ, 1642 MatEqual_MPIAIJ, 1643 MatGetDiagonal_MPIAIJ, 1644 MatDiagonalScale_MPIAIJ, 1645 MatNorm_MPIAIJ, 1646 /*20*/ MatAssemblyBegin_MPIAIJ, 1647 MatAssemblyEnd_MPIAIJ, 1648 0, 1649 MatSetOption_MPIAIJ, 1650 MatZeroEntries_MPIAIJ, 1651 /*25*/ MatZeroRows_MPIAIJ, 1652 0, 1653 0, 1654 0, 1655 0, 1656 /*30*/ MatSetUpPreallocation_MPIAIJ, 1657 0, 1658 0, 1659 0, 1660 0, 1661 /*35*/ MatDuplicate_MPIAIJ, 1662 0, 1663 0, 1664 0, 1665 0, 1666 /*40*/ MatAXPY_MPIAIJ, 1667 MatGetSubMatrices_MPIAIJ, 1668 MatIncreaseOverlap_MPIAIJ, 1669 MatGetValues_MPIAIJ, 1670 MatCopy_MPIAIJ, 1671 /*45*/ MatPrintHelp_MPIAIJ, 1672 MatScale_MPIAIJ, 1673 0, 1674 0, 1675 0, 1676 /*50*/ MatSetBlockSize_MPIAIJ, 1677 0, 1678 0, 1679 0, 1680 0, 1681 /*55*/ MatFDColoringCreate_MPIAIJ, 1682 0, 1683 MatSetUnfactored_MPIAIJ, 1684 0, 1685 0, 1686 /*60*/ MatGetSubMatrix_MPIAIJ, 1687 MatDestroy_MPIAIJ, 1688 MatView_MPIAIJ, 1689 MatGetPetscMaps_Petsc, 1690 0, 1691 /*65*/ 0, 1692 0, 1693 0, 1694 0, 1695 0, 1696 /*70*/ 0, 1697 0, 1698 MatSetColoring_MPIAIJ, 1699 #if defined(PETSC_HAVE_ADIC) 1700 MatSetValuesAdic_MPIAIJ, 1701 #else 1702 0, 1703 #endif 1704 MatSetValuesAdifor_MPIAIJ, 1705 /*75*/ 0, 1706 0, 1707 0, 1708 0, 1709 0, 1710 /*80*/ 0, 1711 0, 1712 0, 1713 0, 1714 /*84*/ MatLoad_MPIAIJ, 1715 0, 1716 0, 1717 0, 1718 0, 1719 0, 1720 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1721 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1722 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1723 MatPtAP_Basic, 1724 MatPtAPSymbolic_MPIAIJ, 1725 /*95*/ MatPtAPNumeric_MPIAIJ, 1726 0, 1727 0, 1728 0, 1729 0, 1730 /*100*/0, 1731 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1732 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1733 }; 1734 1735 /* ----------------------------------------------------------------------------------------*/ 1736 1737 EXTERN_C_BEGIN 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1740 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1741 { 1742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1743 PetscErrorCode ierr; 1744 1745 PetscFunctionBegin; 1746 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1747 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1748 PetscFunctionReturn(0); 1749 } 1750 EXTERN_C_END 1751 1752 EXTERN_C_BEGIN 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1755 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1756 { 1757 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1758 PetscErrorCode ierr; 1759 1760 PetscFunctionBegin; 1761 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1762 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1763 PetscFunctionReturn(0); 1764 } 1765 EXTERN_C_END 1766 1767 #include "petscpc.h" 1768 EXTERN_C_BEGIN 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1771 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1772 { 1773 Mat_MPIAIJ *b; 1774 PetscErrorCode ierr; 1775 PetscInt i; 1776 1777 PetscFunctionBegin; 1778 B->preallocated = PETSC_TRUE; 1779 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1780 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1781 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1782 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1783 if (d_nnz) { 1784 for (i=0; i<B->m; i++) { 1785 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1786 } 1787 } 1788 if (o_nnz) { 1789 for (i=0; i<B->m; i++) { 1790 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1791 } 1792 } 1793 b = (Mat_MPIAIJ*)B->data; 1794 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1795 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1796 1797 PetscFunctionReturn(0); 1798 } 1799 EXTERN_C_END 1800 1801 #undef __FUNCT__ 1802 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1803 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1804 { 1805 Mat mat; 1806 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1807 PetscErrorCode ierr; 1808 1809 PetscFunctionBegin; 1810 *newmat = 0; 1811 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1812 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1813 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1814 a = (Mat_MPIAIJ*)mat->data; 1815 1816 mat->factor = matin->factor; 1817 mat->bs = matin->bs; 1818 mat->assembled = PETSC_TRUE; 1819 mat->insertmode = NOT_SET_VALUES; 1820 mat->preallocated = PETSC_TRUE; 1821 1822 a->rstart = oldmat->rstart; 1823 a->rend = oldmat->rend; 1824 a->cstart = oldmat->cstart; 1825 a->cend = oldmat->cend; 1826 a->size = oldmat->size; 1827 a->rank = oldmat->rank; 1828 a->donotstash = oldmat->donotstash; 1829 a->roworiented = oldmat->roworiented; 1830 a->rowindices = 0; 1831 a->rowvalues = 0; 1832 a->getrowactive = PETSC_FALSE; 1833 1834 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1835 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1836 if (oldmat->colmap) { 1837 #if defined (PETSC_USE_CTABLE) 1838 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1839 #else 1840 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1841 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1842 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1843 #endif 1844 } else a->colmap = 0; 1845 if (oldmat->garray) { 1846 PetscInt len; 1847 len = oldmat->B->n; 1848 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1849 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1850 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1851 } else a->garray = 0; 1852 1853 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1854 PetscLogObjectParent(mat,a->lvec); 1855 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1856 PetscLogObjectParent(mat,a->Mvctx); 1857 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1858 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1859 PetscLogObjectParent(mat,a->A); 1860 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1861 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1862 PetscLogObjectParent(mat,a->B); 1863 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1864 *newmat = mat; 1865 PetscFunctionReturn(0); 1866 } 1867 1868 #include "petscsys.h" 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatLoad_MPIAIJ" 1872 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1873 { 1874 Mat A; 1875 PetscScalar *vals,*svals; 1876 MPI_Comm comm = ((PetscObject)viewer)->comm; 1877 MPI_Status status; 1878 PetscErrorCode ierr; 1879 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1880 PetscInt i,nz,j,rstart,rend; 1881 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1882 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1883 PetscInt cend,cstart,n,*rowners; 1884 int fd; 1885 1886 PetscFunctionBegin; 1887 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1888 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1889 if (!rank) { 1890 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1891 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1892 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1893 } 1894 1895 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1896 M = header[1]; N = header[2]; 1897 /* determine ownership of all rows */ 1898 m = M/size + ((M % size) > rank); 1899 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1900 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1901 rowners[0] = 0; 1902 for (i=2; i<=size; i++) { 1903 rowners[i] += rowners[i-1]; 1904 } 1905 rstart = rowners[rank]; 1906 rend = rowners[rank+1]; 1907 1908 /* distribute row lengths to all processors */ 1909 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 1910 if (!rank) { 1911 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1912 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1913 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1914 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1915 for (j=0; j<m; j++) { 1916 procsnz[0] += ourlens[j]; 1917 } 1918 for (i=1; i<size; i++) { 1919 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1920 /* calculate the number of nonzeros on each processor */ 1921 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1922 procsnz[i] += rowlengths[j]; 1923 } 1924 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1925 } 1926 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1927 } else { 1928 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1929 } 1930 1931 if (!rank) { 1932 /* determine max buffer needed and allocate it */ 1933 maxnz = 0; 1934 for (i=0; i<size; i++) { 1935 maxnz = PetscMax(maxnz,procsnz[i]); 1936 } 1937 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1938 1939 /* read in my part of the matrix column indices */ 1940 nz = procsnz[0]; 1941 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1942 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1943 1944 /* read in every one elses and ship off */ 1945 for (i=1; i<size; i++) { 1946 nz = procsnz[i]; 1947 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1948 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1949 } 1950 ierr = PetscFree(cols);CHKERRQ(ierr); 1951 } else { 1952 /* determine buffer space needed for message */ 1953 nz = 0; 1954 for (i=0; i<m; i++) { 1955 nz += ourlens[i]; 1956 } 1957 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1958 1959 /* receive message of column indices*/ 1960 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1961 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1962 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1963 } 1964 1965 /* determine column ownership if matrix is not square */ 1966 if (N != M) { 1967 n = N/size + ((N % size) > rank); 1968 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1969 cstart = cend - n; 1970 } else { 1971 cstart = rstart; 1972 cend = rend; 1973 n = cend - cstart; 1974 } 1975 1976 /* loop over local rows, determining number of off diagonal entries */ 1977 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1978 jj = 0; 1979 for (i=0; i<m; i++) { 1980 for (j=0; j<ourlens[i]; j++) { 1981 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1982 jj++; 1983 } 1984 } 1985 1986 /* create our matrix */ 1987 for (i=0; i<m; i++) { 1988 ourlens[i] -= offlens[i]; 1989 } 1990 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 1991 ierr = MatSetType(A,type);CHKERRQ(ierr); 1992 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 1993 1994 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1995 for (i=0; i<m; i++) { 1996 ourlens[i] += offlens[i]; 1997 } 1998 1999 if (!rank) { 2000 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2001 2002 /* read in my part of the matrix numerical values */ 2003 nz = procsnz[0]; 2004 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2005 2006 /* insert into matrix */ 2007 jj = rstart; 2008 smycols = mycols; 2009 svals = vals; 2010 for (i=0; i<m; i++) { 2011 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2012 smycols += ourlens[i]; 2013 svals += ourlens[i]; 2014 jj++; 2015 } 2016 2017 /* read in other processors and ship out */ 2018 for (i=1; i<size; i++) { 2019 nz = procsnz[i]; 2020 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2021 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2022 } 2023 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2024 } else { 2025 /* receive numeric values */ 2026 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2027 2028 /* receive message of values*/ 2029 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2030 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2031 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2032 2033 /* insert into matrix */ 2034 jj = rstart; 2035 smycols = mycols; 2036 svals = vals; 2037 for (i=0; i<m; i++) { 2038 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2039 smycols += ourlens[i]; 2040 svals += ourlens[i]; 2041 jj++; 2042 } 2043 } 2044 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2045 ierr = PetscFree(vals);CHKERRQ(ierr); 2046 ierr = PetscFree(mycols);CHKERRQ(ierr); 2047 ierr = PetscFree(rowners);CHKERRQ(ierr); 2048 2049 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2050 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2051 *newmat = A; 2052 PetscFunctionReturn(0); 2053 } 2054 2055 #undef __FUNCT__ 2056 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2057 /* 2058 Not great since it makes two copies of the submatrix, first an SeqAIJ 2059 in local and then by concatenating the local matrices the end result. 2060 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2061 */ 2062 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2063 { 2064 PetscErrorCode ierr; 2065 PetscMPIInt rank,size; 2066 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2067 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2068 Mat *local,M,Mreuse; 2069 PetscScalar *vwork,*aa; 2070 MPI_Comm comm = mat->comm; 2071 Mat_SeqAIJ *aij; 2072 2073 2074 PetscFunctionBegin; 2075 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2076 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2077 2078 if (call == MAT_REUSE_MATRIX) { 2079 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2080 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2081 local = &Mreuse; 2082 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2083 } else { 2084 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2085 Mreuse = *local; 2086 ierr = PetscFree(local);CHKERRQ(ierr); 2087 } 2088 2089 /* 2090 m - number of local rows 2091 n - number of columns (same on all processors) 2092 rstart - first row in new global matrix generated 2093 */ 2094 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2095 if (call == MAT_INITIAL_MATRIX) { 2096 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2097 ii = aij->i; 2098 jj = aij->j; 2099 2100 /* 2101 Determine the number of non-zeros in the diagonal and off-diagonal 2102 portions of the matrix in order to do correct preallocation 2103 */ 2104 2105 /* first get start and end of "diagonal" columns */ 2106 if (csize == PETSC_DECIDE) { 2107 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2108 if (mglobal == n) { /* square matrix */ 2109 nlocal = m; 2110 } else { 2111 nlocal = n/size + ((n % size) > rank); 2112 } 2113 } else { 2114 nlocal = csize; 2115 } 2116 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2117 rstart = rend - nlocal; 2118 if (rank == size - 1 && rend != n) { 2119 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2120 } 2121 2122 /* next, compute all the lengths */ 2123 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2124 olens = dlens + m; 2125 for (i=0; i<m; i++) { 2126 jend = ii[i+1] - ii[i]; 2127 olen = 0; 2128 dlen = 0; 2129 for (j=0; j<jend; j++) { 2130 if (*jj < rstart || *jj >= rend) olen++; 2131 else dlen++; 2132 jj++; 2133 } 2134 olens[i] = olen; 2135 dlens[i] = dlen; 2136 } 2137 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2138 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2139 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2140 ierr = PetscFree(dlens);CHKERRQ(ierr); 2141 } else { 2142 PetscInt ml,nl; 2143 2144 M = *newmat; 2145 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2146 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2147 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2148 /* 2149 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2150 rather than the slower MatSetValues(). 2151 */ 2152 M->was_assembled = PETSC_TRUE; 2153 M->assembled = PETSC_FALSE; 2154 } 2155 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2156 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2157 ii = aij->i; 2158 jj = aij->j; 2159 aa = aij->a; 2160 for (i=0; i<m; i++) { 2161 row = rstart + i; 2162 nz = ii[i+1] - ii[i]; 2163 cwork = jj; jj += nz; 2164 vwork = aa; aa += nz; 2165 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2166 } 2167 2168 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2169 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2170 *newmat = M; 2171 2172 /* save submatrix used in processor for next request */ 2173 if (call == MAT_INITIAL_MATRIX) { 2174 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2175 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2176 } 2177 2178 PetscFunctionReturn(0); 2179 } 2180 2181 EXTERN_C_BEGIN 2182 #undef __FUNCT__ 2183 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2184 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2185 { 2186 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2187 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2188 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2189 const PetscInt *JJ; 2190 PetscScalar *values; 2191 PetscErrorCode ierr; 2192 2193 PetscFunctionBegin; 2194 #if defined(PETSC_OPT_g) 2195 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2196 #endif 2197 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2198 o_nnz = d_nnz + m; 2199 2200 for (i=0; i<m; i++) { 2201 nnz = I[i+1]- I[i]; 2202 JJ = J + I[i]; 2203 nnz_max = PetscMax(nnz_max,nnz); 2204 #if defined(PETSC_OPT_g) 2205 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2206 #endif 2207 for (j=0; j<nnz; j++) { 2208 if (*JJ >= cstart) break; 2209 JJ++; 2210 } 2211 d = 0; 2212 for (; j<nnz; j++) { 2213 if (*JJ++ >= cend) break; 2214 d++; 2215 } 2216 d_nnz[i] = d; 2217 o_nnz[i] = nnz - d; 2218 } 2219 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2220 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2221 2222 if (v) values = (PetscScalar*)v; 2223 else { 2224 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2225 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2226 } 2227 2228 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2229 for (i=0; i<m; i++) { 2230 ii = i + rstart; 2231 nnz = I[i+1]- I[i]; 2232 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2233 } 2234 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2235 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2236 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2237 2238 if (!v) { 2239 ierr = PetscFree(values);CHKERRQ(ierr); 2240 } 2241 PetscFunctionReturn(0); 2242 } 2243 EXTERN_C_END 2244 2245 #undef __FUNCT__ 2246 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2247 /*@C 2248 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2249 (the default parallel PETSc format). 2250 2251 Collective on MPI_Comm 2252 2253 Input Parameters: 2254 + A - the matrix 2255 . i - the indices into j for the start of each local row (starts with zero) 2256 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2257 - v - optional values in the matrix 2258 2259 Level: developer 2260 2261 .keywords: matrix, aij, compressed row, sparse, parallel 2262 2263 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2264 @*/ 2265 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2266 { 2267 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2268 2269 PetscFunctionBegin; 2270 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2271 if (f) { 2272 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2273 } 2274 PetscFunctionReturn(0); 2275 } 2276 2277 #undef __FUNCT__ 2278 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2279 /*@C 2280 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2281 (the default parallel PETSc format). For good matrix assembly performance 2282 the user should preallocate the matrix storage by setting the parameters 2283 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2284 performance can be increased by more than a factor of 50. 2285 2286 Collective on MPI_Comm 2287 2288 Input Parameters: 2289 + A - the matrix 2290 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2291 (same value is used for all local rows) 2292 . d_nnz - array containing the number of nonzeros in the various rows of the 2293 DIAGONAL portion of the local submatrix (possibly different for each row) 2294 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2295 The size of this array is equal to the number of local rows, i.e 'm'. 2296 You must leave room for the diagonal entry even if it is zero. 2297 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2298 submatrix (same value is used for all local rows). 2299 - o_nnz - array containing the number of nonzeros in the various rows of the 2300 OFF-DIAGONAL portion of the local submatrix (possibly different for 2301 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2302 structure. The size of this array is equal to the number 2303 of local rows, i.e 'm'. 2304 2305 If the *_nnz parameter is given then the *_nz parameter is ignored 2306 2307 The AIJ format (also called the Yale sparse matrix format or 2308 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2309 storage. The stored row and column indices begin with zero. See the users manual for details. 2310 2311 The parallel matrix is partitioned such that the first m0 rows belong to 2312 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2313 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2314 2315 The DIAGONAL portion of the local submatrix of a processor can be defined 2316 as the submatrix which is obtained by extraction the part corresponding 2317 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2318 first row that belongs to the processor, and r2 is the last row belonging 2319 to the this processor. This is a square mxm matrix. The remaining portion 2320 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2321 2322 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2323 2324 Example usage: 2325 2326 Consider the following 8x8 matrix with 34 non-zero values, that is 2327 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2328 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2329 as follows: 2330 2331 .vb 2332 1 2 0 | 0 3 0 | 0 4 2333 Proc0 0 5 6 | 7 0 0 | 8 0 2334 9 0 10 | 11 0 0 | 12 0 2335 ------------------------------------- 2336 13 0 14 | 15 16 17 | 0 0 2337 Proc1 0 18 0 | 19 20 21 | 0 0 2338 0 0 0 | 22 23 0 | 24 0 2339 ------------------------------------- 2340 Proc2 25 26 27 | 0 0 28 | 29 0 2341 30 0 0 | 31 32 33 | 0 34 2342 .ve 2343 2344 This can be represented as a collection of submatrices as: 2345 2346 .vb 2347 A B C 2348 D E F 2349 G H I 2350 .ve 2351 2352 Where the submatrices A,B,C are owned by proc0, D,E,F are 2353 owned by proc1, G,H,I are owned by proc2. 2354 2355 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2356 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2357 The 'M','N' parameters are 8,8, and have the same values on all procs. 2358 2359 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2360 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2361 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2362 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2363 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2364 matrix, ans [DF] as another SeqAIJ matrix. 2365 2366 When d_nz, o_nz parameters are specified, d_nz storage elements are 2367 allocated for every row of the local diagonal submatrix, and o_nz 2368 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2369 One way to choose d_nz and o_nz is to use the max nonzerors per local 2370 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2371 In this case, the values of d_nz,o_nz are: 2372 .vb 2373 proc0 : dnz = 2, o_nz = 2 2374 proc1 : dnz = 3, o_nz = 2 2375 proc2 : dnz = 1, o_nz = 4 2376 .ve 2377 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2378 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2379 for proc3. i.e we are using 12+15+10=37 storage locations to store 2380 34 values. 2381 2382 When d_nnz, o_nnz parameters are specified, the storage is specified 2383 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2384 In the above case the values for d_nnz,o_nnz are: 2385 .vb 2386 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2387 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2388 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2389 .ve 2390 Here the space allocated is sum of all the above values i.e 34, and 2391 hence pre-allocation is perfect. 2392 2393 Level: intermediate 2394 2395 .keywords: matrix, aij, compressed row, sparse, parallel 2396 2397 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2398 MPIAIJ 2399 @*/ 2400 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2401 { 2402 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2403 2404 PetscFunctionBegin; 2405 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2406 if (f) { 2407 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2408 } 2409 PetscFunctionReturn(0); 2410 } 2411 2412 #undef __FUNCT__ 2413 #define __FUNCT__ "MatCreateMPIAIJ" 2414 /*@C 2415 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2416 (the default parallel PETSc format). For good matrix assembly performance 2417 the user should preallocate the matrix storage by setting the parameters 2418 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2419 performance can be increased by more than a factor of 50. 2420 2421 Collective on MPI_Comm 2422 2423 Input Parameters: 2424 + comm - MPI communicator 2425 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2426 This value should be the same as the local size used in creating the 2427 y vector for the matrix-vector product y = Ax. 2428 . n - This value should be the same as the local size used in creating the 2429 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2430 calculated if N is given) For square matrices n is almost always m. 2431 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2432 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2433 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2434 (same value is used for all local rows) 2435 . d_nnz - array containing the number of nonzeros in the various rows of the 2436 DIAGONAL portion of the local submatrix (possibly different for each row) 2437 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2438 The size of this array is equal to the number of local rows, i.e 'm'. 2439 You must leave room for the diagonal entry even if it is zero. 2440 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2441 submatrix (same value is used for all local rows). 2442 - o_nnz - array containing the number of nonzeros in the various rows of the 2443 OFF-DIAGONAL portion of the local submatrix (possibly different for 2444 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2445 structure. The size of this array is equal to the number 2446 of local rows, i.e 'm'. 2447 2448 Output Parameter: 2449 . A - the matrix 2450 2451 Notes: 2452 If the *_nnz parameter is given then the *_nz parameter is ignored 2453 2454 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2455 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2456 storage requirements for this matrix. 2457 2458 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2459 processor than it must be used on all processors that share the object for 2460 that argument. 2461 2462 The user MUST specify either the local or global matrix dimensions 2463 (possibly both). 2464 2465 The parallel matrix is partitioned such that the first m0 rows belong to 2466 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2467 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2468 2469 The DIAGONAL portion of the local submatrix of a processor can be defined 2470 as the submatrix which is obtained by extraction the part corresponding 2471 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2472 first row that belongs to the processor, and r2 is the last row belonging 2473 to the this processor. This is a square mxm matrix. The remaining portion 2474 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2475 2476 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2477 2478 When calling this routine with a single process communicator, a matrix of 2479 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2480 type of communicator, use the construction mechanism: 2481 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2482 2483 By default, this format uses inodes (identical nodes) when possible. 2484 We search for consecutive rows with the same nonzero structure, thereby 2485 reusing matrix information to achieve increased efficiency. 2486 2487 Options Database Keys: 2488 + -mat_aij_no_inode - Do not use inodes 2489 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2490 - -mat_aij_oneindex - Internally use indexing starting at 1 2491 rather than 0. Note that when calling MatSetValues(), 2492 the user still MUST index entries starting at 0! 2493 2494 2495 Example usage: 2496 2497 Consider the following 8x8 matrix with 34 non-zero values, that is 2498 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2499 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2500 as follows: 2501 2502 .vb 2503 1 2 0 | 0 3 0 | 0 4 2504 Proc0 0 5 6 | 7 0 0 | 8 0 2505 9 0 10 | 11 0 0 | 12 0 2506 ------------------------------------- 2507 13 0 14 | 15 16 17 | 0 0 2508 Proc1 0 18 0 | 19 20 21 | 0 0 2509 0 0 0 | 22 23 0 | 24 0 2510 ------------------------------------- 2511 Proc2 25 26 27 | 0 0 28 | 29 0 2512 30 0 0 | 31 32 33 | 0 34 2513 .ve 2514 2515 This can be represented as a collection of submatrices as: 2516 2517 .vb 2518 A B C 2519 D E F 2520 G H I 2521 .ve 2522 2523 Where the submatrices A,B,C are owned by proc0, D,E,F are 2524 owned by proc1, G,H,I are owned by proc2. 2525 2526 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2527 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2528 The 'M','N' parameters are 8,8, and have the same values on all procs. 2529 2530 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2531 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2532 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2533 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2534 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2535 matrix, ans [DF] as another SeqAIJ matrix. 2536 2537 When d_nz, o_nz parameters are specified, d_nz storage elements are 2538 allocated for every row of the local diagonal submatrix, and o_nz 2539 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2540 One way to choose d_nz and o_nz is to use the max nonzerors per local 2541 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2542 In this case, the values of d_nz,o_nz are: 2543 .vb 2544 proc0 : dnz = 2, o_nz = 2 2545 proc1 : dnz = 3, o_nz = 2 2546 proc2 : dnz = 1, o_nz = 4 2547 .ve 2548 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2549 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2550 for proc3. i.e we are using 12+15+10=37 storage locations to store 2551 34 values. 2552 2553 When d_nnz, o_nnz parameters are specified, the storage is specified 2554 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2555 In the above case the values for d_nnz,o_nnz are: 2556 .vb 2557 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2558 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2559 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2560 .ve 2561 Here the space allocated is sum of all the above values i.e 34, and 2562 hence pre-allocation is perfect. 2563 2564 Level: intermediate 2565 2566 .keywords: matrix, aij, compressed row, sparse, parallel 2567 2568 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2569 MPIAIJ 2570 @*/ 2571 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2572 { 2573 PetscErrorCode ierr; 2574 PetscMPIInt size; 2575 2576 PetscFunctionBegin; 2577 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2578 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2579 if (size > 1) { 2580 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2581 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2582 } else { 2583 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2584 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2585 } 2586 PetscFunctionReturn(0); 2587 } 2588 2589 #undef __FUNCT__ 2590 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2591 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2592 { 2593 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2594 2595 PetscFunctionBegin; 2596 *Ad = a->A; 2597 *Ao = a->B; 2598 *colmap = a->garray; 2599 PetscFunctionReturn(0); 2600 } 2601 2602 #undef __FUNCT__ 2603 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2604 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2605 { 2606 PetscErrorCode ierr; 2607 PetscInt i; 2608 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2609 2610 PetscFunctionBegin; 2611 if (coloring->ctype == IS_COLORING_LOCAL) { 2612 ISColoringValue *allcolors,*colors; 2613 ISColoring ocoloring; 2614 2615 /* set coloring for diagonal portion */ 2616 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2617 2618 /* set coloring for off-diagonal portion */ 2619 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2620 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2621 for (i=0; i<a->B->n; i++) { 2622 colors[i] = allcolors[a->garray[i]]; 2623 } 2624 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2625 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2626 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2627 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2628 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2629 ISColoringValue *colors; 2630 PetscInt *larray; 2631 ISColoring ocoloring; 2632 2633 /* set coloring for diagonal portion */ 2634 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2635 for (i=0; i<a->A->n; i++) { 2636 larray[i] = i + a->cstart; 2637 } 2638 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2639 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2640 for (i=0; i<a->A->n; i++) { 2641 colors[i] = coloring->colors[larray[i]]; 2642 } 2643 ierr = PetscFree(larray);CHKERRQ(ierr); 2644 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2645 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2646 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2647 2648 /* set coloring for off-diagonal portion */ 2649 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2650 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2651 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2652 for (i=0; i<a->B->n; i++) { 2653 colors[i] = coloring->colors[larray[i]]; 2654 } 2655 ierr = PetscFree(larray);CHKERRQ(ierr); 2656 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2657 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2658 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2659 } else { 2660 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2661 } 2662 2663 PetscFunctionReturn(0); 2664 } 2665 2666 #if defined(PETSC_HAVE_ADIC) 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2669 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2670 { 2671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2672 PetscErrorCode ierr; 2673 2674 PetscFunctionBegin; 2675 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2676 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2677 PetscFunctionReturn(0); 2678 } 2679 #endif 2680 2681 #undef __FUNCT__ 2682 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2683 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2684 { 2685 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2686 PetscErrorCode ierr; 2687 2688 PetscFunctionBegin; 2689 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2690 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 #undef __FUNCT__ 2695 #define __FUNCT__ "MatMerge" 2696 /*@C 2697 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2698 matrices from each processor 2699 2700 Collective on MPI_Comm 2701 2702 Input Parameters: 2703 + comm - the communicators the parallel matrix will live on 2704 . inmat - the input sequential matrices 2705 . n - number of local columns (or PETSC_DECIDE) 2706 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2707 2708 Output Parameter: 2709 . outmat - the parallel matrix generated 2710 2711 Level: advanced 2712 2713 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2714 2715 @*/ 2716 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2717 { 2718 PetscErrorCode ierr; 2719 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2720 PetscInt *indx; 2721 PetscScalar *values; 2722 PetscMap columnmap,rowmap; 2723 2724 PetscFunctionBegin; 2725 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2726 /* 2727 PetscMPIInt rank; 2728 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2729 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2730 */ 2731 if (scall == MAT_INITIAL_MATRIX){ 2732 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2733 if (n == PETSC_DECIDE){ 2734 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2735 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2736 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2737 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2738 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2739 } 2740 2741 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2742 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2743 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2744 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2745 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2746 2747 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2748 for (i=0;i<m;i++) { 2749 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2750 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2751 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2752 } 2753 /* This routine will ONLY return MPIAIJ type matrix */ 2754 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2755 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2756 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2757 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2758 2759 } else if (scall == MAT_REUSE_MATRIX){ 2760 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2761 } else { 2762 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2763 } 2764 2765 for (i=0;i<m;i++) { 2766 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2767 I = i + rstart; 2768 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2769 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2770 } 2771 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2772 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2773 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2774 2775 PetscFunctionReturn(0); 2776 } 2777 2778 #undef __FUNCT__ 2779 #define __FUNCT__ "MatFileSplit" 2780 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2781 { 2782 PetscErrorCode ierr; 2783 PetscMPIInt rank; 2784 PetscInt m,N,i,rstart,nnz; 2785 size_t len; 2786 const PetscInt *indx; 2787 PetscViewer out; 2788 char *name; 2789 Mat B; 2790 const PetscScalar *values; 2791 2792 PetscFunctionBegin; 2793 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2794 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2795 /* Should this be the type of the diagonal block of A? */ 2796 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2797 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2798 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2799 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2800 for (i=0;i<m;i++) { 2801 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2802 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2803 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2804 } 2805 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2806 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2807 2808 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2809 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2810 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2811 sprintf(name,"%s.%d",outfile,rank); 2812 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2813 ierr = PetscFree(name); 2814 ierr = MatView(B,out);CHKERRQ(ierr); 2815 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2816 ierr = MatDestroy(B);CHKERRQ(ierr); 2817 PetscFunctionReturn(0); 2818 } 2819 2820 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2821 #undef __FUNCT__ 2822 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2823 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2824 { 2825 PetscErrorCode ierr; 2826 Mat_Merge_SeqsToMPI *merge; 2827 PetscObjectContainer container; 2828 2829 PetscFunctionBegin; 2830 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2831 if (container) { 2832 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2833 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2834 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2835 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2836 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2837 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2838 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2839 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2840 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2841 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2842 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2843 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2844 2845 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2846 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2847 } 2848 ierr = PetscFree(merge);CHKERRQ(ierr); 2849 2850 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2851 PetscFunctionReturn(0); 2852 } 2853 2854 #include "src/mat/utils/freespace.h" 2855 #include "petscbt.h" 2856 #undef __FUNCT__ 2857 #define __FUNCT__ "MatMerge_SeqsToMPI" 2858 /*@C 2859 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2860 matrices from each processor 2861 2862 Collective on MPI_Comm 2863 2864 Input Parameters: 2865 + comm - the communicators the parallel matrix will live on 2866 . seqmat - the input sequential matrices 2867 . m - number of local rows (or PETSC_DECIDE) 2868 . n - number of local columns (or PETSC_DECIDE) 2869 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2870 2871 Output Parameter: 2872 . mpimat - the parallel matrix generated 2873 2874 Level: advanced 2875 2876 Notes: 2877 The dimensions of the sequential matrix in each processor MUST be the same. 2878 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2879 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2880 @*/ 2881 static PetscEvent logkey_seqstompinum = 0; 2882 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2883 { 2884 PetscErrorCode ierr; 2885 MPI_Comm comm=mpimat->comm; 2886 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2887 PetscMPIInt size,rank,taga,*len_s; 2888 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2889 PetscInt proc,m; 2890 PetscInt **buf_ri,**buf_rj; 2891 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2892 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2893 MPI_Request *s_waits,*r_waits; 2894 MPI_Status *status; 2895 MatScalar *aa=a->a,**abuf_r,*ba_i; 2896 Mat_Merge_SeqsToMPI *merge; 2897 PetscObjectContainer container; 2898 2899 PetscFunctionBegin; 2900 if (!logkey_seqstompinum) { 2901 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2902 } 2903 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2904 2905 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2906 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2907 2908 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2909 if (container) { 2910 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2911 } 2912 bi = merge->bi; 2913 bj = merge->bj; 2914 buf_ri = merge->buf_ri; 2915 buf_rj = merge->buf_rj; 2916 2917 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2918 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2919 len_s = merge->len_s; 2920 2921 /* send and recv matrix values */ 2922 /*-----------------------------*/ 2923 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2924 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2925 2926 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2927 for (proc=0,k=0; proc<size; proc++){ 2928 if (!len_s[proc]) continue; 2929 i = owners[proc]; 2930 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2931 k++; 2932 } 2933 2934 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2935 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2936 ierr = PetscFree(status);CHKERRQ(ierr); 2937 2938 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2939 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2940 2941 /* insert mat values of mpimat */ 2942 /*----------------------------*/ 2943 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2944 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2945 nextrow = buf_ri_k + merge->nrecv; 2946 nextai = nextrow + merge->nrecv; 2947 2948 for (k=0; k<merge->nrecv; k++){ 2949 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2950 nrows = *(buf_ri_k[k]); 2951 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2952 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2953 } 2954 2955 /* set values of ba */ 2956 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2957 for (i=0; i<m; i++) { 2958 arow = owners[rank] + i; 2959 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2960 bnzi = bi[i+1] - bi[i]; 2961 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2962 2963 /* add local non-zero vals of this proc's seqmat into ba */ 2964 anzi = ai[arow+1] - ai[arow]; 2965 aj = a->j + ai[arow]; 2966 aa = a->a + ai[arow]; 2967 nextaj = 0; 2968 for (j=0; nextaj<anzi; j++){ 2969 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2970 ba_i[j] += aa[nextaj++]; 2971 } 2972 } 2973 2974 /* add received vals into ba */ 2975 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2976 /* i-th row */ 2977 if (i == *nextrow[k]) { 2978 anzi = *(nextai[k]+1) - *nextai[k]; 2979 aj = buf_rj[k] + *(nextai[k]); 2980 aa = abuf_r[k] + *(nextai[k]); 2981 nextaj = 0; 2982 for (j=0; nextaj<anzi; j++){ 2983 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2984 ba_i[j] += aa[nextaj++]; 2985 } 2986 } 2987 nextrow[k]++; nextai[k]++; 2988 } 2989 } 2990 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 2991 } 2992 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2993 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2994 2995 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 2996 ierr = PetscFree(ba_i);CHKERRQ(ierr); 2997 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 2998 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2999 PetscFunctionReturn(0); 3000 } 3001 static PetscEvent logkey_seqstompisym = 0; 3002 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3003 { 3004 PetscErrorCode ierr; 3005 Mat B_mpi; 3006 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3007 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3008 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3009 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3010 PetscInt len,proc,*dnz,*onz; 3011 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3012 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3013 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3014 MPI_Status *status; 3015 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3016 PetscBT lnkbt; 3017 Mat_Merge_SeqsToMPI *merge; 3018 PetscObjectContainer container; 3019 3020 PetscFunctionBegin; 3021 if (!logkey_seqstompisym) { 3022 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3023 } 3024 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3025 3026 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3027 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3028 3029 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3030 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3031 3032 /* determine row ownership */ 3033 /*---------------------------------------------------------*/ 3034 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3035 if (m == PETSC_DECIDE) { 3036 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3037 } else { 3038 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3039 } 3040 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3041 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3042 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3043 3044 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3045 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3046 3047 /* determine the number of messages to send, their lengths */ 3048 /*---------------------------------------------------------*/ 3049 len_s = merge->len_s; 3050 3051 len = 0; /* length of buf_si[] */ 3052 merge->nsend = 0; 3053 for (proc=0; proc<size; proc++){ 3054 len_si[proc] = 0; 3055 if (proc == rank){ 3056 len_s[proc] = 0; 3057 } else { 3058 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3059 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3060 } 3061 if (len_s[proc]) { 3062 merge->nsend++; 3063 nrows = 0; 3064 for (i=owners[proc]; i<owners[proc+1]; i++){ 3065 if (ai[i+1] > ai[i]) nrows++; 3066 } 3067 len_si[proc] = 2*(nrows+1); 3068 len += len_si[proc]; 3069 } 3070 } 3071 3072 /* determine the number and length of messages to receive for ij-structure */ 3073 /*-------------------------------------------------------------------------*/ 3074 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3075 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3076 3077 /* post the Irecv of j-structure */ 3078 /*-------------------------------*/ 3079 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3080 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3081 3082 /* post the Isend of j-structure */ 3083 /*--------------------------------*/ 3084 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3085 sj_waits = si_waits + merge->nsend; 3086 3087 for (proc=0, k=0; proc<size; proc++){ 3088 if (!len_s[proc]) continue; 3089 i = owners[proc]; 3090 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3091 k++; 3092 } 3093 3094 /* receives and sends of j-structure are complete */ 3095 /*------------------------------------------------*/ 3096 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3097 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3098 3099 /* send and recv i-structure */ 3100 /*---------------------------*/ 3101 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3102 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3103 3104 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3105 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3106 for (proc=0,k=0; proc<size; proc++){ 3107 if (!len_s[proc]) continue; 3108 /* form outgoing message for i-structure: 3109 buf_si[0]: nrows to be sent 3110 [1:nrows]: row index (global) 3111 [nrows+1:2*nrows+1]: i-structure index 3112 */ 3113 /*-------------------------------------------*/ 3114 nrows = len_si[proc]/2 - 1; 3115 buf_si_i = buf_si + nrows+1; 3116 buf_si[0] = nrows; 3117 buf_si_i[0] = 0; 3118 nrows = 0; 3119 for (i=owners[proc]; i<owners[proc+1]; i++){ 3120 anzi = ai[i+1] - ai[i]; 3121 if (anzi) { 3122 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3123 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3124 nrows++; 3125 } 3126 } 3127 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3128 k++; 3129 buf_si += len_si[proc]; 3130 } 3131 3132 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3133 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3134 3135 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3136 for (i=0; i<merge->nrecv; i++){ 3137 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3138 } 3139 3140 ierr = PetscFree(len_si);CHKERRQ(ierr); 3141 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3142 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3143 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3144 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3145 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3146 ierr = PetscFree(status);CHKERRQ(ierr); 3147 3148 /* compute a local seq matrix in each processor */ 3149 /*----------------------------------------------*/ 3150 /* allocate bi array and free space for accumulating nonzero column info */ 3151 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3152 bi[0] = 0; 3153 3154 /* create and initialize a linked list */ 3155 nlnk = N+1; 3156 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3157 3158 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3159 len = 0; 3160 len = ai[owners[rank+1]] - ai[owners[rank]]; 3161 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3162 current_space = free_space; 3163 3164 /* determine symbolic info for each local row */ 3165 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3166 nextrow = buf_ri_k + merge->nrecv; 3167 nextai = nextrow + merge->nrecv; 3168 for (k=0; k<merge->nrecv; k++){ 3169 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3170 nrows = *buf_ri_k[k]; 3171 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3172 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3173 } 3174 3175 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3176 len = 0; 3177 for (i=0;i<m;i++) { 3178 bnzi = 0; 3179 /* add local non-zero cols of this proc's seqmat into lnk */ 3180 arow = owners[rank] + i; 3181 anzi = ai[arow+1] - ai[arow]; 3182 aj = a->j + ai[arow]; 3183 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3184 bnzi += nlnk; 3185 /* add received col data into lnk */ 3186 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3187 if (i == *nextrow[k]) { /* i-th row */ 3188 anzi = *(nextai[k]+1) - *nextai[k]; 3189 aj = buf_rj[k] + *nextai[k]; 3190 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3191 bnzi += nlnk; 3192 nextrow[k]++; nextai[k]++; 3193 } 3194 } 3195 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3196 3197 /* if free space is not available, make more free space */ 3198 if (current_space->local_remaining<bnzi) { 3199 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3200 nspacedouble++; 3201 } 3202 /* copy data into free space, then initialize lnk */ 3203 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3204 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3205 3206 current_space->array += bnzi; 3207 current_space->local_used += bnzi; 3208 current_space->local_remaining -= bnzi; 3209 3210 bi[i+1] = bi[i] + bnzi; 3211 } 3212 3213 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3214 3215 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3216 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3217 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3218 3219 /* create symbolic parallel matrix B_mpi */ 3220 /*---------------------------------------*/ 3221 if (n==PETSC_DECIDE) { 3222 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3223 } else { 3224 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3225 } 3226 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3227 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3228 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3229 3230 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3231 B_mpi->assembled = PETSC_FALSE; 3232 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3233 merge->bi = bi; 3234 merge->bj = bj; 3235 merge->buf_ri = buf_ri; 3236 merge->buf_rj = buf_rj; 3237 merge->coi = PETSC_NULL; 3238 merge->coj = PETSC_NULL; 3239 merge->owners_co = PETSC_NULL; 3240 3241 /* attach the supporting struct to B_mpi for reuse */ 3242 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3243 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3244 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3245 *mpimat = B_mpi; 3246 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3247 PetscFunctionReturn(0); 3248 } 3249 3250 static PetscEvent logkey_seqstompi = 0; 3251 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3252 { 3253 PetscErrorCode ierr; 3254 3255 PetscFunctionBegin; 3256 if (!logkey_seqstompi) { 3257 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3258 } 3259 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3260 if (scall == MAT_INITIAL_MATRIX){ 3261 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3262 } 3263 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3264 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3265 PetscFunctionReturn(0); 3266 } 3267 static PetscEvent logkey_getlocalmat = 0; 3268 #undef __FUNCT__ 3269 #define __FUNCT__ "MatGetLocalMat" 3270 /*@C 3271 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3272 3273 Not Collective 3274 3275 Input Parameters: 3276 + A - the matrix 3277 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3278 3279 Output Parameter: 3280 . A_loc - the local sequential matrix generated 3281 3282 Level: developer 3283 3284 @*/ 3285 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3286 { 3287 PetscErrorCode ierr; 3288 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3289 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3290 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3291 PetscScalar *aa=a->a,*ba=b->a,*ca; 3292 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3293 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3294 3295 PetscFunctionBegin; 3296 if (!logkey_getlocalmat) { 3297 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3298 } 3299 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3300 if (scall == MAT_INITIAL_MATRIX){ 3301 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3302 ci[0] = 0; 3303 for (i=0; i<am; i++){ 3304 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3305 } 3306 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3307 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3308 k = 0; 3309 for (i=0; i<am; i++) { 3310 ncols_o = bi[i+1] - bi[i]; 3311 ncols_d = ai[i+1] - ai[i]; 3312 /* off-diagonal portion of A */ 3313 for (jo=0; jo<ncols_o; jo++) { 3314 col = cmap[*bj]; 3315 if (col >= cstart) break; 3316 cj[k] = col; bj++; 3317 ca[k++] = *ba++; 3318 } 3319 /* diagonal portion of A */ 3320 for (j=0; j<ncols_d; j++) { 3321 cj[k] = cstart + *aj++; 3322 ca[k++] = *aa++; 3323 } 3324 /* off-diagonal portion of A */ 3325 for (j=jo; j<ncols_o; j++) { 3326 cj[k] = cmap[*bj++]; 3327 ca[k++] = *ba++; 3328 } 3329 } 3330 /* put together the new matrix */ 3331 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3332 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3333 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3334 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3335 mat->freedata = PETSC_TRUE; 3336 mat->nonew = 0; 3337 } else if (scall == MAT_REUSE_MATRIX){ 3338 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3339 ci = mat->i; cj = mat->j; ca = mat->a; 3340 for (i=0; i<am; i++) { 3341 /* off-diagonal portion of A */ 3342 ncols_o = bi[i+1] - bi[i]; 3343 for (jo=0; jo<ncols_o; jo++) { 3344 col = cmap[*bj]; 3345 if (col >= cstart) break; 3346 *ca++ = *ba++; bj++; 3347 } 3348 /* diagonal portion of A */ 3349 ncols_d = ai[i+1] - ai[i]; 3350 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3351 /* off-diagonal portion of A */ 3352 for (j=jo; j<ncols_o; j++) { 3353 *ca++ = *ba++; bj++; 3354 } 3355 } 3356 } else { 3357 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3358 } 3359 3360 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3361 PetscFunctionReturn(0); 3362 } 3363 3364 static PetscEvent logkey_getlocalmatcondensed = 0; 3365 #undef __FUNCT__ 3366 #define __FUNCT__ "MatGetLocalMatCondensed" 3367 /*@C 3368 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3369 3370 Not Collective 3371 3372 Input Parameters: 3373 + A - the matrix 3374 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3375 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3376 3377 Output Parameter: 3378 . A_loc - the local sequential matrix generated 3379 3380 Level: developer 3381 3382 @*/ 3383 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3384 { 3385 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3386 PetscErrorCode ierr; 3387 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3388 IS isrowa,iscola; 3389 Mat *aloc; 3390 3391 PetscFunctionBegin; 3392 if (!logkey_getlocalmatcondensed) { 3393 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3394 } 3395 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3396 if (!row){ 3397 start = a->rstart; end = a->rend; 3398 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3399 } else { 3400 isrowa = *row; 3401 } 3402 if (!col){ 3403 start = a->cstart; 3404 cmap = a->garray; 3405 nzA = a->A->n; 3406 nzB = a->B->n; 3407 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3408 ncols = 0; 3409 for (i=0; i<nzB; i++) { 3410 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3411 else break; 3412 } 3413 imark = i; 3414 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3415 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3416 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3417 ierr = PetscFree(idx);CHKERRQ(ierr); 3418 } else { 3419 iscola = *col; 3420 } 3421 if (scall != MAT_INITIAL_MATRIX){ 3422 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3423 aloc[0] = *A_loc; 3424 } 3425 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3426 *A_loc = aloc[0]; 3427 ierr = PetscFree(aloc);CHKERRQ(ierr); 3428 if (!row){ 3429 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3430 } 3431 if (!col){ 3432 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3433 } 3434 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3435 PetscFunctionReturn(0); 3436 } 3437 3438 static PetscEvent logkey_GetBrowsOfAcols = 0; 3439 #undef __FUNCT__ 3440 #define __FUNCT__ "MatGetBrowsOfAcols" 3441 /*@C 3442 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3443 3444 Collective on Mat 3445 3446 Input Parameters: 3447 + A,B - the matrices in mpiaij format 3448 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3449 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3450 3451 Output Parameter: 3452 + rowb, colb - index sets of rows and columns of B to extract 3453 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3454 - B_seq - the sequential matrix generated 3455 3456 Level: developer 3457 3458 @*/ 3459 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3460 { 3461 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3462 PetscErrorCode ierr; 3463 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3464 IS isrowb,iscolb; 3465 Mat *bseq; 3466 3467 PetscFunctionBegin; 3468 if (a->cstart != b->rstart || a->cend != b->rend){ 3469 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3470 } 3471 if (!logkey_GetBrowsOfAcols) { 3472 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3473 } 3474 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3475 3476 if (scall == MAT_INITIAL_MATRIX){ 3477 start = a->cstart; 3478 cmap = a->garray; 3479 nzA = a->A->n; 3480 nzB = a->B->n; 3481 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3482 ncols = 0; 3483 for (i=0; i<nzB; i++) { /* row < local row index */ 3484 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3485 else break; 3486 } 3487 imark = i; 3488 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3489 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3490 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3491 ierr = PetscFree(idx);CHKERRQ(ierr); 3492 *brstart = imark; 3493 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3494 } else { 3495 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3496 isrowb = *rowb; iscolb = *colb; 3497 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3498 bseq[0] = *B_seq; 3499 } 3500 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3501 *B_seq = bseq[0]; 3502 ierr = PetscFree(bseq);CHKERRQ(ierr); 3503 if (!rowb){ 3504 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3505 } else { 3506 *rowb = isrowb; 3507 } 3508 if (!colb){ 3509 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3510 } else { 3511 *colb = iscolb; 3512 } 3513 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3514 PetscFunctionReturn(0); 3515 } 3516 3517 static PetscEvent logkey_GetBrowsOfAocols = 0; 3518 #undef __FUNCT__ 3519 #define __FUNCT__ "MatGetBrowsOfAoCols" 3520 /*@C 3521 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3522 of the OFF-DIAGONAL portion of local A 3523 3524 Collective on Mat 3525 3526 Input Parameters: 3527 + A,B - the matrices in mpiaij format 3528 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3529 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3530 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3531 3532 Output Parameter: 3533 + B_oth - the sequential matrix generated 3534 3535 Level: developer 3536 3537 @*/ 3538 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3539 { 3540 VecScatter_MPI_General *gen_to,*gen_from; 3541 PetscErrorCode ierr; 3542 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3543 Mat_SeqAIJ *b_oth; 3544 VecScatter ctx=a->Mvctx; 3545 MPI_Comm comm=ctx->comm; 3546 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3547 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3548 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3549 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3550 MPI_Request *rwaits,*swaits; 3551 MPI_Status *sstatus,rstatus; 3552 PetscInt *cols; 3553 PetscScalar *vals; 3554 PetscMPIInt j; 3555 3556 PetscFunctionBegin; 3557 if (a->cstart != b->rstart || a->cend != b->rend){ 3558 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3559 } 3560 if (!logkey_GetBrowsOfAocols) { 3561 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3562 } 3563 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3564 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3565 3566 gen_to = (VecScatter_MPI_General*)ctx->todata; 3567 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3568 rvalues = gen_from->values; /* holds the length of sending row */ 3569 svalues = gen_to->values; /* holds the length of receiving row */ 3570 nrecvs = gen_from->n; 3571 nsends = gen_to->n; 3572 rwaits = gen_from->requests; 3573 swaits = gen_to->requests; 3574 srow = gen_to->indices; /* local row index to be sent */ 3575 rstarts = gen_from->starts; 3576 sstarts = gen_to->starts; 3577 rprocs = gen_from->procs; 3578 sprocs = gen_to->procs; 3579 sstatus = gen_to->sstatus; 3580 3581 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3582 if (scall == MAT_INITIAL_MATRIX){ 3583 /* i-array */ 3584 /*---------*/ 3585 /* post receives */ 3586 for (i=0; i<nrecvs; i++){ 3587 rowlen = (PetscInt*)rvalues + rstarts[i]; 3588 nrows = rstarts[i+1]-rstarts[i]; 3589 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3590 } 3591 3592 /* pack the outgoing message */ 3593 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3594 rstartsj = sstartsj + nsends +1; 3595 sstartsj[0] = 0; rstartsj[0] = 0; 3596 len = 0; /* total length of j or a array to be sent */ 3597 k = 0; 3598 for (i=0; i<nsends; i++){ 3599 rowlen = (PetscInt*)svalues + sstarts[i]; 3600 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3601 for (j=0; j<nrows; j++) { 3602 row = srow[k] + b->rowners[rank]; /* global row idx */ 3603 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3604 len += rowlen[j]; 3605 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3606 k++; 3607 } 3608 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3609 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3610 } 3611 /* recvs and sends of i-array are completed */ 3612 i = nrecvs; 3613 while (i--) { 3614 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3615 } 3616 if (nsends) { 3617 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3618 } 3619 /* allocate buffers for sending j and a arrays */ 3620 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3621 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3622 3623 /* create i-array of B_oth */ 3624 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3625 b_othi[0] = 0; 3626 len = 0; /* total length of j or a array to be received */ 3627 k = 0; 3628 for (i=0; i<nrecvs; i++){ 3629 rowlen = (PetscInt*)rvalues + rstarts[i]; 3630 nrows = rstarts[i+1]-rstarts[i]; 3631 for (j=0; j<nrows; j++) { 3632 b_othi[k+1] = b_othi[k] + rowlen[j]; 3633 len += rowlen[j]; k++; 3634 } 3635 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3636 } 3637 3638 /* allocate space for j and a arrrays of B_oth */ 3639 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3640 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3641 3642 /* j-array */ 3643 /*---------*/ 3644 /* post receives of j-array */ 3645 for (i=0; i<nrecvs; i++){ 3646 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3647 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3648 } 3649 k = 0; 3650 for (i=0; i<nsends; i++){ 3651 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3652 bufJ = bufj+sstartsj[i]; 3653 for (j=0; j<nrows; j++) { 3654 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3655 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3656 for (l=0; l<ncols; l++){ 3657 *bufJ++ = cols[l]; 3658 } 3659 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3660 } 3661 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3662 } 3663 3664 /* recvs and sends of j-array are completed */ 3665 i = nrecvs; 3666 while (i--) { 3667 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3668 } 3669 if (nsends) { 3670 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3671 } 3672 } else if (scall == MAT_REUSE_MATRIX){ 3673 sstartsj = *startsj; 3674 rstartsj = sstartsj + nsends +1; 3675 bufa = *bufa_ptr; 3676 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3677 b_otha = b_oth->a; 3678 } else { 3679 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3680 } 3681 3682 /* a-array */ 3683 /*---------*/ 3684 /* post receives of a-array */ 3685 for (i=0; i<nrecvs; i++){ 3686 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3687 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3688 } 3689 k = 0; 3690 for (i=0; i<nsends; i++){ 3691 nrows = sstarts[i+1]-sstarts[i]; 3692 bufA = bufa+sstartsj[i]; 3693 for (j=0; j<nrows; j++) { 3694 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3695 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3696 for (l=0; l<ncols; l++){ 3697 *bufA++ = vals[l]; 3698 } 3699 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3700 3701 } 3702 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3703 } 3704 /* recvs and sends of a-array are completed */ 3705 i = nrecvs; 3706 while (i--) { 3707 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3708 } 3709 if (nsends) { 3710 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3711 } 3712 3713 if (scall == MAT_INITIAL_MATRIX){ 3714 /* put together the new matrix */ 3715 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3716 3717 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3718 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3719 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3720 b_oth->freedata = PETSC_TRUE; 3721 b_oth->nonew = 0; 3722 3723 ierr = PetscFree(bufj);CHKERRQ(ierr); 3724 if (!startsj || !bufa_ptr){ 3725 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3726 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3727 } else { 3728 *startsj = sstartsj; 3729 *bufa_ptr = bufa; 3730 } 3731 } 3732 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3733 3734 PetscFunctionReturn(0); 3735 } 3736 3737 /*MC 3738 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3739 3740 Options Database Keys: 3741 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3742 3743 Level: beginner 3744 3745 .seealso: MatCreateMPIAIJ 3746 M*/ 3747 3748 EXTERN_C_BEGIN 3749 #undef __FUNCT__ 3750 #define __FUNCT__ "MatCreate_MPIAIJ" 3751 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3752 { 3753 Mat_MPIAIJ *b; 3754 PetscErrorCode ierr; 3755 PetscInt i; 3756 PetscMPIInt size; 3757 3758 PetscFunctionBegin; 3759 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3760 3761 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3762 B->data = (void*)b; 3763 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3764 B->factor = 0; 3765 B->bs = 1; 3766 B->assembled = PETSC_FALSE; 3767 B->mapping = 0; 3768 3769 B->insertmode = NOT_SET_VALUES; 3770 b->size = size; 3771 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3772 3773 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3774 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3775 3776 /* the information in the maps duplicates the information computed below, eventually 3777 we should remove the duplicate information that is not contained in the maps */ 3778 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3779 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3780 3781 /* build local table of row and column ownerships */ 3782 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3783 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3784 b->cowners = b->rowners + b->size + 2; 3785 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3786 b->rowners[0] = 0; 3787 for (i=2; i<=b->size; i++) { 3788 b->rowners[i] += b->rowners[i-1]; 3789 } 3790 b->rstart = b->rowners[b->rank]; 3791 b->rend = b->rowners[b->rank+1]; 3792 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3793 b->cowners[0] = 0; 3794 for (i=2; i<=b->size; i++) { 3795 b->cowners[i] += b->cowners[i-1]; 3796 } 3797 b->cstart = b->cowners[b->rank]; 3798 b->cend = b->cowners[b->rank+1]; 3799 3800 /* build cache for off array entries formed */ 3801 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3802 b->donotstash = PETSC_FALSE; 3803 b->colmap = 0; 3804 b->garray = 0; 3805 b->roworiented = PETSC_TRUE; 3806 3807 /* stuff used for matrix vector multiply */ 3808 b->lvec = PETSC_NULL; 3809 b->Mvctx = PETSC_NULL; 3810 3811 /* stuff for MatGetRow() */ 3812 b->rowindices = 0; 3813 b->rowvalues = 0; 3814 b->getrowactive = PETSC_FALSE; 3815 3816 /* Explicitly create 2 MATSEQAIJ matrices. */ 3817 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3818 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3819 PetscLogObjectParent(B,b->A); 3820 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3821 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3822 PetscLogObjectParent(B,b->B); 3823 3824 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3825 "MatStoreValues_MPIAIJ", 3826 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3827 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3828 "MatRetrieveValues_MPIAIJ", 3829 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3830 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3831 "MatGetDiagonalBlock_MPIAIJ", 3832 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3833 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3834 "MatIsTranspose_MPIAIJ", 3835 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3836 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3837 "MatMPIAIJSetPreallocation_MPIAIJ", 3838 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3839 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3840 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3841 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3842 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3843 "MatDiagonalScaleLocal_MPIAIJ", 3844 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3845 PetscFunctionReturn(0); 3846 } 3847 EXTERN_C_END 3848