1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (PetscInt*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(PetscInt));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (PetscInt*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(PetscInt));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 192 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 PetscInt nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 278 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 PetscInt nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 PetscMPIInt n; 351 PetscInt i,j,rstart,ncols,flg; 352 PetscInt *row,*col,other_disassembled; 353 PetscScalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 360 if (!flg) break; 361 362 for (i=0; i<n;) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 373 } 374 375 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 388 /* reaccess the b because aij->B was changed */ 389 b = (Mat_SeqAIJ *)aij->B->data; 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 404 /* used by MatAXPY() */ 405 a->xtoy = 0; b->xtoy = 0; 406 a->XtoY = 0; b->XtoY = 0; 407 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 413 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 414 { 415 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 416 PetscErrorCode ierr; 417 418 PetscFunctionBegin; 419 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 420 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatZeroRows_MPIAIJ" 426 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 427 { 428 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 429 PetscErrorCode ierr; 430 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 431 PetscInt i,N,*rows,*owners = l->rowners; 432 PetscInt *nprocs,j,idx,nsends,row; 433 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 434 PetscInt *rvalues,count,base,slen,*source; 435 PetscInt *lens,*lrows,*values,rstart=l->rstart; 436 MPI_Comm comm = A->comm; 437 MPI_Request *send_waits,*recv_waits; 438 MPI_Status recv_status,*send_status; 439 IS istmp; 440 PetscTruth found; 441 442 PetscFunctionBegin; 443 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 445 446 /* first count number of contributors to each processor */ 447 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 448 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 450 for (i=0; i<N; i++) { 451 idx = rows[i]; 452 found = PETSC_FALSE; 453 for (j=0; j<size; j++) { 454 if (idx >= owners[j] && idx < owners[j+1]) { 455 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 456 } 457 } 458 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 459 } 460 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 461 462 /* inform other processors of number of messages and max length*/ 463 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 464 465 /* post receives: */ 466 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 467 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 468 for (i=0; i<nrecvs; i++) { 469 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 470 } 471 472 /* do sends: 473 1) starts[i] gives the starting index in svalues for stuff going to 474 the ith processor 475 */ 476 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 477 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 478 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 479 starts[0] = 0; 480 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 481 for (i=0; i<N; i++) { 482 svalues[starts[owner[i]]++] = rows[i]; 483 } 484 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 485 486 starts[0] = 0; 487 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 488 count = 0; 489 for (i=0; i<size; i++) { 490 if (nprocs[2*i+1]) { 491 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 492 } 493 } 494 ierr = PetscFree(starts);CHKERRQ(ierr); 495 496 base = owners[rank]; 497 498 /* wait on receives */ 499 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 500 source = lens + nrecvs; 501 count = nrecvs; slen = 0; 502 while (count) { 503 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 504 /* unpack receives into our local space */ 505 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 506 source[imdex] = recv_status.MPI_SOURCE; 507 lens[imdex] = n; 508 slen += n; 509 count--; 510 } 511 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 512 513 /* move the data into the send scatter */ 514 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 515 count = 0; 516 for (i=0; i<nrecvs; i++) { 517 values = rvalues + i*nmax; 518 for (j=0; j<lens[i]; j++) { 519 lrows[count++] = values[j] - base; 520 } 521 } 522 ierr = PetscFree(rvalues);CHKERRQ(ierr); 523 ierr = PetscFree(lens);CHKERRQ(ierr); 524 ierr = PetscFree(owner);CHKERRQ(ierr); 525 ierr = PetscFree(nprocs);CHKERRQ(ierr); 526 527 /* actually zap the local rows */ 528 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 529 PetscLogObjectParent(A,istmp); 530 531 /* 532 Zero the required rows. If the "diagonal block" of the matrix 533 is square and the user wishes to set the diagonal we use seperate 534 code so that MatSetValues() is not called for each diagonal allocating 535 new memory, thus calling lots of mallocs and slowing things down. 536 537 Contributed by: Mathew Knepley 538 */ 539 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 540 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 541 if (diag && (l->A->M == l->A->N)) { 542 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 543 } else if (diag) { 544 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 545 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 546 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 547 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 548 } 549 for (i = 0; i < slen; i++) { 550 row = lrows[i] + rstart; 551 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 552 } 553 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 } else { 556 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 557 } 558 ierr = ISDestroy(istmp);CHKERRQ(ierr); 559 ierr = PetscFree(lrows);CHKERRQ(ierr); 560 561 /* wait on sends */ 562 if (nsends) { 563 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 564 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 565 ierr = PetscFree(send_status);CHKERRQ(ierr); 566 } 567 ierr = PetscFree(send_waits);CHKERRQ(ierr); 568 ierr = PetscFree(svalues);CHKERRQ(ierr); 569 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatMult_MPIAIJ" 575 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 576 { 577 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 578 PetscErrorCode ierr; 579 PetscInt nt; 580 581 PetscFunctionBegin; 582 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 583 if (nt != A->n) { 584 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 585 } 586 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 588 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 589 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatMultAdd_MPIAIJ" 595 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 596 { 597 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 598 PetscErrorCode ierr; 599 600 PetscFunctionBegin; 601 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 603 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 610 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 611 { 612 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 613 PetscErrorCode ierr; 614 615 PetscFunctionBegin; 616 /* do nondiagonal part */ 617 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 618 /* send it on its way */ 619 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 620 /* do local part */ 621 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 622 /* receive remote parts: note this assumes the values are not actually */ 623 /* inserted in yy until the next line, which is true for my implementation*/ 624 /* but is not perhaps always true. */ 625 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 EXTERN_C_BEGIN 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 632 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 633 { 634 MPI_Comm comm; 635 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 636 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 637 IS Me,Notme; 638 PetscErrorCode ierr; 639 PetscInt M,N,first,last,*notme,i; 640 PetscMPIInt size; 641 642 PetscFunctionBegin; 643 644 /* Easy test: symmetric diagonal block */ 645 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 646 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 647 if (!*f) PetscFunctionReturn(0); 648 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 649 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 650 if (size == 1) PetscFunctionReturn(0); 651 652 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 653 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 654 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 655 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 656 for (i=0; i<first; i++) notme[i] = i; 657 for (i=last; i<M; i++) notme[i-last+first] = i; 658 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 659 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 660 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 661 Aoff = Aoffs[0]; 662 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 663 Boff = Boffs[0]; 664 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 665 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 666 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 667 ierr = ISDestroy(Me);CHKERRQ(ierr); 668 ierr = ISDestroy(Notme);CHKERRQ(ierr); 669 670 PetscFunctionReturn(0); 671 } 672 EXTERN_C_END 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 676 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 679 PetscErrorCode ierr; 680 681 PetscFunctionBegin; 682 /* do nondiagonal part */ 683 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 684 /* send it on its way */ 685 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 686 /* do local part */ 687 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 688 /* receive remote parts: note this assumes the values are not actually */ 689 /* inserted in yy until the next line, which is true for my implementation*/ 690 /* but is not perhaps always true. */ 691 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 /* 696 This only works correctly for square matrices where the subblock A->A is the 697 diagonal block 698 */ 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 701 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 702 { 703 PetscErrorCode ierr; 704 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 705 706 PetscFunctionBegin; 707 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 708 if (a->rstart != a->cstart || a->rend != a->cend) { 709 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 710 } 711 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatScale_MPIAIJ" 717 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 PetscErrorCode ierr; 721 722 PetscFunctionBegin; 723 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 724 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatDestroy_MPIAIJ" 730 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 731 { 732 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 733 PetscErrorCode ierr; 734 735 PetscFunctionBegin; 736 #if defined(PETSC_USE_LOG) 737 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 738 #endif 739 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 740 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 741 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 742 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 743 #if defined (PETSC_USE_CTABLE) 744 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 745 #else 746 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 747 #endif 748 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 749 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 750 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 751 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 752 ierr = PetscFree(aij);CHKERRQ(ierr); 753 754 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 755 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 756 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 757 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatView_MPIAIJ_Binary" 766 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 767 { 768 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 769 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 770 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 771 PetscErrorCode ierr; 772 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 773 int fd; 774 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 775 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 776 PetscScalar *column_values; 777 778 PetscFunctionBegin; 779 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 780 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 781 nz = A->nz + B->nz; 782 if (!rank) { 783 header[0] = MAT_FILE_COOKIE; 784 header[1] = mat->M; 785 header[2] = mat->N; 786 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 787 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 788 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 789 /* get largest number of rows any processor has */ 790 rlen = mat->m; 791 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 792 for (i=1; i<size; i++) { 793 rlen = PetscMax(rlen,range[i+1] - range[i]); 794 } 795 } else { 796 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 797 rlen = mat->m; 798 } 799 800 /* load up the local row counts */ 801 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 802 for (i=0; i<mat->m; i++) { 803 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 804 } 805 806 /* store the row lengths to the file */ 807 if (!rank) { 808 MPI_Status status; 809 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 810 for (i=1; i<size; i++) { 811 rlen = range[i+1] - range[i]; 812 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 813 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 814 } 815 } else { 816 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 817 } 818 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 819 820 /* load up the local column indices */ 821 nzmax = nz; /* )th processor needs space a largest processor needs */ 822 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 823 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 824 cnt = 0; 825 for (i=0; i<mat->m; i++) { 826 for (j=B->i[i]; j<B->i[i+1]; j++) { 827 if ( (col = garray[B->j[j]]) > cstart) break; 828 column_indices[cnt++] = col; 829 } 830 for (k=A->i[i]; k<A->i[i+1]; k++) { 831 column_indices[cnt++] = A->j[k] + cstart; 832 } 833 for (; j<B->i[i+1]; j++) { 834 column_indices[cnt++] = garray[B->j[j]]; 835 } 836 } 837 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 838 839 /* store the column indices to the file */ 840 if (!rank) { 841 MPI_Status status; 842 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 843 for (i=1; i<size; i++) { 844 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 845 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 846 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 847 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 848 } 849 } else { 850 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 851 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 852 } 853 ierr = PetscFree(column_indices);CHKERRQ(ierr); 854 855 /* load up the local column values */ 856 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 857 cnt = 0; 858 for (i=0; i<mat->m; i++) { 859 for (j=B->i[i]; j<B->i[i+1]; j++) { 860 if ( garray[B->j[j]] > cstart) break; 861 column_values[cnt++] = B->a[j]; 862 } 863 for (k=A->i[i]; k<A->i[i+1]; k++) { 864 column_values[cnt++] = A->a[k]; 865 } 866 for (; j<B->i[i+1]; j++) { 867 column_values[cnt++] = B->a[j]; 868 } 869 } 870 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 871 872 /* store the column values to the file */ 873 if (!rank) { 874 MPI_Status status; 875 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 876 for (i=1; i<size; i++) { 877 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 878 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 879 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 880 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 881 } 882 } else { 883 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 884 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 885 } 886 ierr = PetscFree(column_values);CHKERRQ(ierr); 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 892 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 893 { 894 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 895 PetscErrorCode ierr; 896 PetscMPIInt rank = aij->rank,size = aij->size; 897 PetscTruth isdraw,iascii,flg,isbinary; 898 PetscViewer sviewer; 899 PetscViewerFormat format; 900 901 PetscFunctionBegin; 902 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 903 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 904 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 905 if (iascii) { 906 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 907 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 908 MatInfo info; 909 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 910 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 911 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 912 if (flg) { 913 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 914 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 915 } else { 916 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 917 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 918 } 919 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 920 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 921 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 922 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 923 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 924 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } else if (format == PETSC_VIEWER_ASCII_INFO) { 927 PetscFunctionReturn(0); 928 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 929 PetscFunctionReturn(0); 930 } 931 } else if (isbinary) { 932 if (size == 1) { 933 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 934 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 935 } else { 936 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 937 } 938 PetscFunctionReturn(0); 939 } else if (isdraw) { 940 PetscDraw draw; 941 PetscTruth isnull; 942 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 943 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 944 } 945 946 if (size == 1) { 947 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 948 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 949 } else { 950 /* assemble the entire matrix onto first processor. */ 951 Mat A; 952 Mat_SeqAIJ *Aloc; 953 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 954 PetscScalar *a; 955 956 if (!rank) { 957 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 958 } else { 959 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 960 } 961 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 962 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 963 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 964 PetscLogObjectParent(mat,A); 965 966 /* copy over the A part */ 967 Aloc = (Mat_SeqAIJ*)aij->A->data; 968 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 969 row = aij->rstart; 970 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 971 for (i=0; i<m; i++) { 972 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 973 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 974 } 975 aj = Aloc->j; 976 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 977 978 /* copy over the B part */ 979 Aloc = (Mat_SeqAIJ*)aij->B->data; 980 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 981 row = aij->rstart; 982 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 983 ct = cols; 984 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 985 for (i=0; i<m; i++) { 986 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 987 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 988 } 989 ierr = PetscFree(ct);CHKERRQ(ierr); 990 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 991 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 992 /* 993 Everyone has to call to draw the matrix since the graphics waits are 994 synchronized across all processors that share the PetscDraw object 995 */ 996 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 997 if (!rank) { 998 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 999 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1000 } 1001 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1002 ierr = MatDestroy(A);CHKERRQ(ierr); 1003 } 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatView_MPIAIJ" 1009 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1010 { 1011 PetscErrorCode ierr; 1012 PetscTruth iascii,isdraw,issocket,isbinary; 1013 1014 PetscFunctionBegin; 1015 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1016 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1017 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1018 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1019 if (iascii || isdraw || isbinary || issocket) { 1020 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1021 } else { 1022 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatRelax_MPIAIJ" 1031 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1032 { 1033 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1034 PetscErrorCode ierr; 1035 Vec bb1; 1036 PetscScalar mone=-1.0; 1037 1038 PetscFunctionBegin; 1039 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1040 1041 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1042 1043 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1044 if (flag & SOR_ZERO_INITIAL_GUESS) { 1045 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1046 its--; 1047 } 1048 1049 while (its--) { 1050 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1051 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1052 1053 /* update rhs: bb1 = bb - B*x */ 1054 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1055 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1056 1057 /* local sweep */ 1058 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1059 CHKERRQ(ierr); 1060 } 1061 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1062 if (flag & SOR_ZERO_INITIAL_GUESS) { 1063 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1064 its--; 1065 } 1066 while (its--) { 1067 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1068 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 1070 /* update rhs: bb1 = bb - B*x */ 1071 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1072 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1073 1074 /* local sweep */ 1075 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1076 CHKERRQ(ierr); 1077 } 1078 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1079 if (flag & SOR_ZERO_INITIAL_GUESS) { 1080 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1081 its--; 1082 } 1083 while (its--) { 1084 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1085 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1086 1087 /* update rhs: bb1 = bb - B*x */ 1088 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1089 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1090 1091 /* local sweep */ 1092 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1093 CHKERRQ(ierr); 1094 } 1095 } else { 1096 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1097 } 1098 1099 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1105 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1106 { 1107 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1108 Mat A = mat->A,B = mat->B; 1109 PetscErrorCode ierr; 1110 PetscReal isend[5],irecv[5]; 1111 1112 PetscFunctionBegin; 1113 info->block_size = 1.0; 1114 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1115 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1116 isend[3] = info->memory; isend[4] = info->mallocs; 1117 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1118 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1119 isend[3] += info->memory; isend[4] += info->mallocs; 1120 if (flag == MAT_LOCAL) { 1121 info->nz_used = isend[0]; 1122 info->nz_allocated = isend[1]; 1123 info->nz_unneeded = isend[2]; 1124 info->memory = isend[3]; 1125 info->mallocs = isend[4]; 1126 } else if (flag == MAT_GLOBAL_MAX) { 1127 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1128 info->nz_used = irecv[0]; 1129 info->nz_allocated = irecv[1]; 1130 info->nz_unneeded = irecv[2]; 1131 info->memory = irecv[3]; 1132 info->mallocs = irecv[4]; 1133 } else if (flag == MAT_GLOBAL_SUM) { 1134 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1135 info->nz_used = irecv[0]; 1136 info->nz_allocated = irecv[1]; 1137 info->nz_unneeded = irecv[2]; 1138 info->memory = irecv[3]; 1139 info->mallocs = irecv[4]; 1140 } 1141 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1142 info->fill_ratio_needed = 0; 1143 info->factor_mallocs = 0; 1144 info->rows_global = (double)matin->M; 1145 info->columns_global = (double)matin->N; 1146 info->rows_local = (double)matin->m; 1147 info->columns_local = (double)matin->N; 1148 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatSetOption_MPIAIJ" 1154 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1155 { 1156 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1157 PetscErrorCode ierr; 1158 1159 PetscFunctionBegin; 1160 switch (op) { 1161 case MAT_NO_NEW_NONZERO_LOCATIONS: 1162 case MAT_YES_NEW_NONZERO_LOCATIONS: 1163 case MAT_COLUMNS_UNSORTED: 1164 case MAT_COLUMNS_SORTED: 1165 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1166 case MAT_KEEP_ZEROED_ROWS: 1167 case MAT_NEW_NONZERO_LOCATION_ERR: 1168 case MAT_USE_INODES: 1169 case MAT_DO_NOT_USE_INODES: 1170 case MAT_IGNORE_ZERO_ENTRIES: 1171 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1172 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1173 break; 1174 case MAT_ROW_ORIENTED: 1175 a->roworiented = PETSC_TRUE; 1176 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1177 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1178 break; 1179 case MAT_ROWS_SORTED: 1180 case MAT_ROWS_UNSORTED: 1181 case MAT_YES_NEW_DIAGONALS: 1182 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1183 break; 1184 case MAT_COLUMN_ORIENTED: 1185 a->roworiented = PETSC_FALSE; 1186 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1187 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1188 break; 1189 case MAT_IGNORE_OFF_PROC_ENTRIES: 1190 a->donotstash = PETSC_TRUE; 1191 break; 1192 case MAT_NO_NEW_DIAGONALS: 1193 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1194 case MAT_SYMMETRIC: 1195 case MAT_STRUCTURALLY_SYMMETRIC: 1196 case MAT_HERMITIAN: 1197 case MAT_SYMMETRY_ETERNAL: 1198 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1199 break; 1200 case MAT_NOT_SYMMETRIC: 1201 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1202 case MAT_NOT_HERMITIAN: 1203 case MAT_NOT_SYMMETRY_ETERNAL: 1204 break; 1205 default: 1206 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1207 } 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatGetRow_MPIAIJ" 1213 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1214 { 1215 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1216 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1217 PetscErrorCode ierr; 1218 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1219 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1220 PetscInt *cmap,*idx_p; 1221 1222 PetscFunctionBegin; 1223 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1224 mat->getrowactive = PETSC_TRUE; 1225 1226 if (!mat->rowvalues && (idx || v)) { 1227 /* 1228 allocate enough space to hold information from the longest row. 1229 */ 1230 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1231 PetscInt max = 1,tmp; 1232 for (i=0; i<matin->m; i++) { 1233 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1234 if (max < tmp) { max = tmp; } 1235 } 1236 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1237 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1238 } 1239 1240 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1241 lrow = row - rstart; 1242 1243 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1244 if (!v) {pvA = 0; pvB = 0;} 1245 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1246 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1247 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1248 nztot = nzA + nzB; 1249 1250 cmap = mat->garray; 1251 if (v || idx) { 1252 if (nztot) { 1253 /* Sort by increasing column numbers, assuming A and B already sorted */ 1254 PetscInt imark = -1; 1255 if (v) { 1256 *v = v_p = mat->rowvalues; 1257 for (i=0; i<nzB; i++) { 1258 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1259 else break; 1260 } 1261 imark = i; 1262 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1263 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1264 } 1265 if (idx) { 1266 *idx = idx_p = mat->rowindices; 1267 if (imark > -1) { 1268 for (i=0; i<imark; i++) { 1269 idx_p[i] = cmap[cworkB[i]]; 1270 } 1271 } else { 1272 for (i=0; i<nzB; i++) { 1273 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1274 else break; 1275 } 1276 imark = i; 1277 } 1278 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1279 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1280 } 1281 } else { 1282 if (idx) *idx = 0; 1283 if (v) *v = 0; 1284 } 1285 } 1286 *nz = nztot; 1287 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1288 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1294 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1295 { 1296 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1297 1298 PetscFunctionBegin; 1299 if (aij->getrowactive == PETSC_FALSE) { 1300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1301 } 1302 aij->getrowactive = PETSC_FALSE; 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatNorm_MPIAIJ" 1308 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1309 { 1310 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1311 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1312 PetscErrorCode ierr; 1313 PetscInt i,j,cstart = aij->cstart; 1314 PetscReal sum = 0.0; 1315 PetscScalar *v; 1316 1317 PetscFunctionBegin; 1318 if (aij->size == 1) { 1319 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1320 } else { 1321 if (type == NORM_FROBENIUS) { 1322 v = amat->a; 1323 for (i=0; i<amat->nz; i++) { 1324 #if defined(PETSC_USE_COMPLEX) 1325 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1326 #else 1327 sum += (*v)*(*v); v++; 1328 #endif 1329 } 1330 v = bmat->a; 1331 for (i=0; i<bmat->nz; i++) { 1332 #if defined(PETSC_USE_COMPLEX) 1333 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1334 #else 1335 sum += (*v)*(*v); v++; 1336 #endif 1337 } 1338 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1339 *norm = sqrt(*norm); 1340 } else if (type == NORM_1) { /* max column norm */ 1341 PetscReal *tmp,*tmp2; 1342 PetscInt *jj,*garray = aij->garray; 1343 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1344 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1345 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1346 *norm = 0.0; 1347 v = amat->a; jj = amat->j; 1348 for (j=0; j<amat->nz; j++) { 1349 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1350 } 1351 v = bmat->a; jj = bmat->j; 1352 for (j=0; j<bmat->nz; j++) { 1353 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1354 } 1355 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1356 for (j=0; j<mat->N; j++) { 1357 if (tmp2[j] > *norm) *norm = tmp2[j]; 1358 } 1359 ierr = PetscFree(tmp);CHKERRQ(ierr); 1360 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1361 } else if (type == NORM_INFINITY) { /* max row norm */ 1362 PetscReal ntemp = 0.0; 1363 for (j=0; j<aij->A->m; j++) { 1364 v = amat->a + amat->i[j]; 1365 sum = 0.0; 1366 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1367 sum += PetscAbsScalar(*v); v++; 1368 } 1369 v = bmat->a + bmat->i[j]; 1370 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1371 sum += PetscAbsScalar(*v); v++; 1372 } 1373 if (sum > ntemp) ntemp = sum; 1374 } 1375 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1376 } else { 1377 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1378 } 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatTranspose_MPIAIJ" 1385 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1386 { 1387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1388 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1389 PetscErrorCode ierr; 1390 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1391 Mat B; 1392 PetscScalar *array; 1393 1394 PetscFunctionBegin; 1395 if (!matout && M != N) { 1396 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1397 } 1398 1399 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1400 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1401 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1402 1403 /* copy over the A part */ 1404 Aloc = (Mat_SeqAIJ*)a->A->data; 1405 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1406 row = a->rstart; 1407 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1408 for (i=0; i<m; i++) { 1409 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1410 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1411 } 1412 aj = Aloc->j; 1413 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1414 1415 /* copy over the B part */ 1416 Aloc = (Mat_SeqAIJ*)a->B->data; 1417 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1418 row = a->rstart; 1419 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1420 ct = cols; 1421 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1422 for (i=0; i<m; i++) { 1423 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1424 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1425 } 1426 ierr = PetscFree(ct);CHKERRQ(ierr); 1427 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1428 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1429 if (matout) { 1430 *matout = B; 1431 } else { 1432 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1433 } 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1439 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1440 { 1441 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1442 Mat a = aij->A,b = aij->B; 1443 PetscErrorCode ierr; 1444 PetscInt s1,s2,s3; 1445 1446 PetscFunctionBegin; 1447 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1448 if (rr) { 1449 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1450 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1451 /* Overlap communication with computation. */ 1452 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1453 } 1454 if (ll) { 1455 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1456 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1457 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1458 } 1459 /* scale the diagonal block */ 1460 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1461 1462 if (rr) { 1463 /* Do a scatter end and then right scale the off-diagonal block */ 1464 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1465 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1466 } 1467 1468 PetscFunctionReturn(0); 1469 } 1470 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1474 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1475 { 1476 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1477 PetscErrorCode ierr; 1478 1479 PetscFunctionBegin; 1480 if (!a->rank) { 1481 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1488 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1489 { 1490 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1491 PetscErrorCode ierr; 1492 1493 PetscFunctionBegin; 1494 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1495 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1496 PetscFunctionReturn(0); 1497 } 1498 #undef __FUNCT__ 1499 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1500 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1501 { 1502 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1503 PetscErrorCode ierr; 1504 1505 PetscFunctionBegin; 1506 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1507 PetscFunctionReturn(0); 1508 } 1509 1510 #undef __FUNCT__ 1511 #define __FUNCT__ "MatEqual_MPIAIJ" 1512 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1513 { 1514 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1515 Mat a,b,c,d; 1516 PetscTruth flg; 1517 PetscErrorCode ierr; 1518 1519 PetscFunctionBegin; 1520 a = matA->A; b = matA->B; 1521 c = matB->A; d = matB->B; 1522 1523 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1524 if (flg == PETSC_TRUE) { 1525 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1526 } 1527 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatCopy_MPIAIJ" 1533 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1534 { 1535 PetscErrorCode ierr; 1536 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1537 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1538 1539 PetscFunctionBegin; 1540 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1541 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1542 /* because of the column compression in the off-processor part of the matrix a->B, 1543 the number of columns in a->B and b->B may be different, hence we cannot call 1544 the MatCopy() directly on the two parts. If need be, we can provide a more 1545 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1546 then copying the submatrices */ 1547 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1548 } else { 1549 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1550 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1551 } 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1557 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1558 { 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #include "petscblaslapack.h" 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "MatAXPY_MPIAIJ" 1569 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1570 { 1571 PetscErrorCode ierr; 1572 PetscInt i; 1573 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1574 PetscBLASInt bnz,one=1; 1575 Mat_SeqAIJ *x,*y; 1576 1577 PetscFunctionBegin; 1578 if (str == SAME_NONZERO_PATTERN) { 1579 x = (Mat_SeqAIJ *)xx->A->data; 1580 y = (Mat_SeqAIJ *)yy->A->data; 1581 bnz = (PetscBLASInt)x->nz; 1582 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1583 x = (Mat_SeqAIJ *)xx->B->data; 1584 y = (Mat_SeqAIJ *)yy->B->data; 1585 bnz = (PetscBLASInt)x->nz; 1586 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1587 } else if (str == SUBSET_NONZERO_PATTERN) { 1588 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1589 1590 x = (Mat_SeqAIJ *)xx->B->data; 1591 y = (Mat_SeqAIJ *)yy->B->data; 1592 if (y->xtoy && y->XtoY != xx->B) { 1593 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1594 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1595 } 1596 if (!y->xtoy) { /* get xtoy */ 1597 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1598 y->XtoY = xx->B; 1599 } 1600 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1601 } else { 1602 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1603 } 1604 PetscFunctionReturn(0); 1605 } 1606 1607 /* -------------------------------------------------------------------*/ 1608 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1609 MatGetRow_MPIAIJ, 1610 MatRestoreRow_MPIAIJ, 1611 MatMult_MPIAIJ, 1612 /* 4*/ MatMultAdd_MPIAIJ, 1613 MatMultTranspose_MPIAIJ, 1614 MatMultTransposeAdd_MPIAIJ, 1615 0, 1616 0, 1617 0, 1618 /*10*/ 0, 1619 0, 1620 0, 1621 MatRelax_MPIAIJ, 1622 MatTranspose_MPIAIJ, 1623 /*15*/ MatGetInfo_MPIAIJ, 1624 MatEqual_MPIAIJ, 1625 MatGetDiagonal_MPIAIJ, 1626 MatDiagonalScale_MPIAIJ, 1627 MatNorm_MPIAIJ, 1628 /*20*/ MatAssemblyBegin_MPIAIJ, 1629 MatAssemblyEnd_MPIAIJ, 1630 0, 1631 MatSetOption_MPIAIJ, 1632 MatZeroEntries_MPIAIJ, 1633 /*25*/ MatZeroRows_MPIAIJ, 1634 0, 1635 0, 1636 0, 1637 0, 1638 /*30*/ MatSetUpPreallocation_MPIAIJ, 1639 0, 1640 0, 1641 0, 1642 0, 1643 /*35*/ MatDuplicate_MPIAIJ, 1644 0, 1645 0, 1646 0, 1647 0, 1648 /*40*/ MatAXPY_MPIAIJ, 1649 MatGetSubMatrices_MPIAIJ, 1650 MatIncreaseOverlap_MPIAIJ, 1651 MatGetValues_MPIAIJ, 1652 MatCopy_MPIAIJ, 1653 /*45*/ MatPrintHelp_MPIAIJ, 1654 MatScale_MPIAIJ, 1655 0, 1656 0, 1657 0, 1658 /*50*/ MatSetBlockSize_MPIAIJ, 1659 0, 1660 0, 1661 0, 1662 0, 1663 /*55*/ MatFDColoringCreate_MPIAIJ, 1664 0, 1665 MatSetUnfactored_MPIAIJ, 1666 0, 1667 0, 1668 /*60*/ MatGetSubMatrix_MPIAIJ, 1669 MatDestroy_MPIAIJ, 1670 MatView_MPIAIJ, 1671 MatGetPetscMaps_Petsc, 1672 0, 1673 /*65*/ 0, 1674 0, 1675 0, 1676 0, 1677 0, 1678 /*70*/ 0, 1679 0, 1680 MatSetColoring_MPIAIJ, 1681 MatSetValuesAdic_MPIAIJ, 1682 MatSetValuesAdifor_MPIAIJ, 1683 /*75*/ 0, 1684 0, 1685 0, 1686 0, 1687 0, 1688 /*80*/ 0, 1689 0, 1690 0, 1691 0, 1692 /*84*/ MatLoad_MPIAIJ, 1693 0, 1694 0, 1695 0, 1696 0, 1697 0, 1698 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1699 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1700 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1701 MatPtAP_MPIAIJ_MPIAIJ, 1702 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1703 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1704 0, 1705 0, 1706 0}; 1707 1708 /* ----------------------------------------------------------------------------------------*/ 1709 1710 EXTERN_C_BEGIN 1711 #undef __FUNCT__ 1712 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1713 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1714 { 1715 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1716 PetscErrorCode ierr; 1717 1718 PetscFunctionBegin; 1719 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1720 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1721 PetscFunctionReturn(0); 1722 } 1723 EXTERN_C_END 1724 1725 EXTERN_C_BEGIN 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1728 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1729 { 1730 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1735 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 EXTERN_C_END 1739 1740 #include "petscpc.h" 1741 EXTERN_C_BEGIN 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1744 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1745 { 1746 Mat_MPIAIJ *b; 1747 PetscErrorCode ierr; 1748 PetscInt i; 1749 1750 PetscFunctionBegin; 1751 B->preallocated = PETSC_TRUE; 1752 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1753 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1754 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1755 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1756 if (d_nnz) { 1757 for (i=0; i<B->m; i++) { 1758 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1759 } 1760 } 1761 if (o_nnz) { 1762 for (i=0; i<B->m; i++) { 1763 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1764 } 1765 } 1766 b = (Mat_MPIAIJ*)B->data; 1767 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1768 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1769 1770 PetscFunctionReturn(0); 1771 } 1772 EXTERN_C_END 1773 1774 /*MC 1775 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1776 1777 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1778 and MATMPIAIJ otherwise. As a result, for single process communicators, 1779 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1780 for communicators controlling multiple processes. It is recommended that you call both of 1781 the above preallocation routines for simplicity. 1782 1783 Options Database Keys: 1784 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1785 1786 Level: beginner 1787 1788 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1789 M*/ 1790 1791 EXTERN_C_BEGIN 1792 #undef __FUNCT__ 1793 #define __FUNCT__ "MatCreate_AIJ" 1794 PetscErrorCode MatCreate_AIJ(Mat A) 1795 { 1796 PetscErrorCode ierr; 1797 PetscMPIInt size; 1798 1799 PetscFunctionBegin; 1800 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1801 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1802 if (size == 1) { 1803 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1804 } else { 1805 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1806 } 1807 PetscFunctionReturn(0); 1808 } 1809 EXTERN_C_END 1810 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1813 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1814 { 1815 Mat mat; 1816 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 *newmat = 0; 1821 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1822 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1823 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1824 a = (Mat_MPIAIJ*)mat->data; 1825 1826 mat->factor = matin->factor; 1827 mat->bs = matin->bs; 1828 mat->assembled = PETSC_TRUE; 1829 mat->insertmode = NOT_SET_VALUES; 1830 mat->preallocated = PETSC_TRUE; 1831 1832 a->rstart = oldmat->rstart; 1833 a->rend = oldmat->rend; 1834 a->cstart = oldmat->cstart; 1835 a->cend = oldmat->cend; 1836 a->size = oldmat->size; 1837 a->rank = oldmat->rank; 1838 a->donotstash = oldmat->donotstash; 1839 a->roworiented = oldmat->roworiented; 1840 a->rowindices = 0; 1841 a->rowvalues = 0; 1842 a->getrowactive = PETSC_FALSE; 1843 1844 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1845 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1846 if (oldmat->colmap) { 1847 #if defined (PETSC_USE_CTABLE) 1848 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1849 #else 1850 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1851 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1852 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1853 #endif 1854 } else a->colmap = 0; 1855 if (oldmat->garray) { 1856 PetscInt len; 1857 len = oldmat->B->n; 1858 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1859 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1860 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1861 } else a->garray = 0; 1862 1863 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1864 PetscLogObjectParent(mat,a->lvec); 1865 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1866 PetscLogObjectParent(mat,a->Mvctx); 1867 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1868 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1869 PetscLogObjectParent(mat,a->A); 1870 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1871 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1872 PetscLogObjectParent(mat,a->B); 1873 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1874 *newmat = mat; 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #include "petscsys.h" 1879 1880 #undef __FUNCT__ 1881 #define __FUNCT__ "MatLoad_MPIAIJ" 1882 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1883 { 1884 Mat A; 1885 PetscScalar *vals,*svals; 1886 MPI_Comm comm = ((PetscObject)viewer)->comm; 1887 MPI_Status status; 1888 PetscErrorCode ierr; 1889 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1890 PetscInt i,nz,j,rstart,rend; 1891 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1892 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1893 PetscInt cend,cstart,n,*rowners; 1894 int fd; 1895 1896 PetscFunctionBegin; 1897 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1898 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1899 if (!rank) { 1900 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1901 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1902 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1903 } 1904 1905 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1906 M = header[1]; N = header[2]; 1907 /* determine ownership of all rows */ 1908 m = M/size + ((M % size) > rank); 1909 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1910 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1911 rowners[0] = 0; 1912 for (i=2; i<=size; i++) { 1913 rowners[i] += rowners[i-1]; 1914 } 1915 rstart = rowners[rank]; 1916 rend = rowners[rank+1]; 1917 1918 /* distribute row lengths to all processors */ 1919 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 1920 if (!rank) { 1921 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1922 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1923 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1924 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1925 for (j=0; j<m; j++) { 1926 procsnz[0] += ourlens[j]; 1927 } 1928 for (i=1; i<size; i++) { 1929 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1930 /* calculate the number of nonzeros on each processor */ 1931 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1932 procsnz[i] += rowlengths[j]; 1933 } 1934 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1935 } 1936 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1937 } else { 1938 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1939 } 1940 1941 if (!rank) { 1942 /* determine max buffer needed and allocate it */ 1943 maxnz = 0; 1944 for (i=0; i<size; i++) { 1945 maxnz = PetscMax(maxnz,procsnz[i]); 1946 } 1947 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1948 1949 /* read in my part of the matrix column indices */ 1950 nz = procsnz[0]; 1951 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1952 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1953 1954 /* read in every one elses and ship off */ 1955 for (i=1; i<size; i++) { 1956 nz = procsnz[i]; 1957 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1958 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1959 } 1960 ierr = PetscFree(cols);CHKERRQ(ierr); 1961 } else { 1962 /* determine buffer space needed for message */ 1963 nz = 0; 1964 for (i=0; i<m; i++) { 1965 nz += ourlens[i]; 1966 } 1967 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1968 1969 /* receive message of column indices*/ 1970 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1971 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1972 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1973 } 1974 1975 /* determine column ownership if matrix is not square */ 1976 if (N != M) { 1977 n = N/size + ((N % size) > rank); 1978 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1979 cstart = cend - n; 1980 } else { 1981 cstart = rstart; 1982 cend = rend; 1983 n = cend - cstart; 1984 } 1985 1986 /* loop over local rows, determining number of off diagonal entries */ 1987 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1988 jj = 0; 1989 for (i=0; i<m; i++) { 1990 for (j=0; j<ourlens[i]; j++) { 1991 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1992 jj++; 1993 } 1994 } 1995 1996 /* create our matrix */ 1997 for (i=0; i<m; i++) { 1998 ourlens[i] -= offlens[i]; 1999 } 2000 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2001 ierr = MatSetType(A,type);CHKERRQ(ierr); 2002 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2003 2004 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2005 for (i=0; i<m; i++) { 2006 ourlens[i] += offlens[i]; 2007 } 2008 2009 if (!rank) { 2010 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2011 2012 /* read in my part of the matrix numerical values */ 2013 nz = procsnz[0]; 2014 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2015 2016 /* insert into matrix */ 2017 jj = rstart; 2018 smycols = mycols; 2019 svals = vals; 2020 for (i=0; i<m; i++) { 2021 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2022 smycols += ourlens[i]; 2023 svals += ourlens[i]; 2024 jj++; 2025 } 2026 2027 /* read in other processors and ship out */ 2028 for (i=1; i<size; i++) { 2029 nz = procsnz[i]; 2030 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2031 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2032 } 2033 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2034 } else { 2035 /* receive numeric values */ 2036 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2037 2038 /* receive message of values*/ 2039 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2040 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2041 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2042 2043 /* insert into matrix */ 2044 jj = rstart; 2045 smycols = mycols; 2046 svals = vals; 2047 for (i=0; i<m; i++) { 2048 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2049 smycols += ourlens[i]; 2050 svals += ourlens[i]; 2051 jj++; 2052 } 2053 } 2054 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2055 ierr = PetscFree(vals);CHKERRQ(ierr); 2056 ierr = PetscFree(mycols);CHKERRQ(ierr); 2057 ierr = PetscFree(rowners);CHKERRQ(ierr); 2058 2059 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2060 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2061 *newmat = A; 2062 PetscFunctionReturn(0); 2063 } 2064 2065 #undef __FUNCT__ 2066 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2067 /* 2068 Not great since it makes two copies of the submatrix, first an SeqAIJ 2069 in local and then by concatenating the local matrices the end result. 2070 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2071 */ 2072 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2073 { 2074 PetscErrorCode ierr; 2075 PetscMPIInt rank,size; 2076 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2077 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2078 Mat *local,M,Mreuse; 2079 PetscScalar *vwork,*aa; 2080 MPI_Comm comm = mat->comm; 2081 Mat_SeqAIJ *aij; 2082 2083 2084 PetscFunctionBegin; 2085 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2086 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2087 2088 if (call == MAT_REUSE_MATRIX) { 2089 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2090 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2091 local = &Mreuse; 2092 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2093 } else { 2094 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2095 Mreuse = *local; 2096 ierr = PetscFree(local);CHKERRQ(ierr); 2097 } 2098 2099 /* 2100 m - number of local rows 2101 n - number of columns (same on all processors) 2102 rstart - first row in new global matrix generated 2103 */ 2104 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2105 if (call == MAT_INITIAL_MATRIX) { 2106 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2107 ii = aij->i; 2108 jj = aij->j; 2109 2110 /* 2111 Determine the number of non-zeros in the diagonal and off-diagonal 2112 portions of the matrix in order to do correct preallocation 2113 */ 2114 2115 /* first get start and end of "diagonal" columns */ 2116 if (csize == PETSC_DECIDE) { 2117 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2118 if (mglobal == n) { /* square matrix */ 2119 nlocal = m; 2120 } else { 2121 nlocal = n/size + ((n % size) > rank); 2122 } 2123 } else { 2124 nlocal = csize; 2125 } 2126 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2127 rstart = rend - nlocal; 2128 if (rank == size - 1 && rend != n) { 2129 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2130 } 2131 2132 /* next, compute all the lengths */ 2133 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2134 olens = dlens + m; 2135 for (i=0; i<m; i++) { 2136 jend = ii[i+1] - ii[i]; 2137 olen = 0; 2138 dlen = 0; 2139 for (j=0; j<jend; j++) { 2140 if (*jj < rstart || *jj >= rend) olen++; 2141 else dlen++; 2142 jj++; 2143 } 2144 olens[i] = olen; 2145 dlens[i] = dlen; 2146 } 2147 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2148 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2149 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2150 ierr = PetscFree(dlens);CHKERRQ(ierr); 2151 } else { 2152 PetscInt ml,nl; 2153 2154 M = *newmat; 2155 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2156 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2157 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2158 /* 2159 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2160 rather than the slower MatSetValues(). 2161 */ 2162 M->was_assembled = PETSC_TRUE; 2163 M->assembled = PETSC_FALSE; 2164 } 2165 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2166 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2167 ii = aij->i; 2168 jj = aij->j; 2169 aa = aij->a; 2170 for (i=0; i<m; i++) { 2171 row = rstart + i; 2172 nz = ii[i+1] - ii[i]; 2173 cwork = jj; jj += nz; 2174 vwork = aa; aa += nz; 2175 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2176 } 2177 2178 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2179 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2180 *newmat = M; 2181 2182 /* save submatrix used in processor for next request */ 2183 if (call == MAT_INITIAL_MATRIX) { 2184 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2185 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2186 } 2187 2188 PetscFunctionReturn(0); 2189 } 2190 2191 EXTERN_C_BEGIN 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2194 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2195 { 2196 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2197 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2198 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2199 const PetscInt *JJ; 2200 PetscScalar *values; 2201 PetscErrorCode ierr; 2202 2203 PetscFunctionBegin; 2204 #if defined(PETSC_OPT_g) 2205 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2206 #endif 2207 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2208 o_nnz = d_nnz + m; 2209 2210 for (i=0; i<m; i++) { 2211 nnz = I[i+1]- I[i]; 2212 JJ = J + I[i]; 2213 nnz_max = PetscMax(nnz_max,nnz); 2214 #if defined(PETSC_OPT_g) 2215 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2216 #endif 2217 for (j=0; j<nnz; j++) { 2218 if (*JJ >= cstart) break; 2219 JJ++; 2220 } 2221 d = 0; 2222 for (; j<nnz; j++) { 2223 if (*JJ++ >= cend) break; 2224 d++; 2225 } 2226 d_nnz[i] = d; 2227 o_nnz[i] = nnz - d; 2228 } 2229 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2230 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2231 2232 if (v) values = (PetscScalar*)v; 2233 else { 2234 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2235 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2236 } 2237 2238 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2239 for (i=0; i<m; i++) { 2240 ii = i + rstart; 2241 nnz = I[i+1]- I[i]; 2242 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2243 } 2244 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2245 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2246 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2247 2248 if (!v) { 2249 ierr = PetscFree(values);CHKERRQ(ierr); 2250 } 2251 PetscFunctionReturn(0); 2252 } 2253 EXTERN_C_END 2254 2255 #undef __FUNCT__ 2256 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2257 /*@C 2258 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2259 (the default parallel PETSc format). 2260 2261 Collective on MPI_Comm 2262 2263 Input Parameters: 2264 + A - the matrix 2265 . i - the indices into j for the start of each local row (starts with zero) 2266 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2267 - v - optional values in the matrix 2268 2269 Level: developer 2270 2271 .keywords: matrix, aij, compressed row, sparse, parallel 2272 2273 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2274 @*/ 2275 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2276 { 2277 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2278 2279 PetscFunctionBegin; 2280 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2281 if (f) { 2282 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2283 } 2284 PetscFunctionReturn(0); 2285 } 2286 2287 #undef __FUNCT__ 2288 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2289 /*@C 2290 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2291 (the default parallel PETSc format). For good matrix assembly performance 2292 the user should preallocate the matrix storage by setting the parameters 2293 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2294 performance can be increased by more than a factor of 50. 2295 2296 Collective on MPI_Comm 2297 2298 Input Parameters: 2299 + A - the matrix 2300 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2301 (same value is used for all local rows) 2302 . d_nnz - array containing the number of nonzeros in the various rows of the 2303 DIAGONAL portion of the local submatrix (possibly different for each row) 2304 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2305 The size of this array is equal to the number of local rows, i.e 'm'. 2306 You must leave room for the diagonal entry even if it is zero. 2307 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2308 submatrix (same value is used for all local rows). 2309 - o_nnz - array containing the number of nonzeros in the various rows of the 2310 OFF-DIAGONAL portion of the local submatrix (possibly different for 2311 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2312 structure. The size of this array is equal to the number 2313 of local rows, i.e 'm'. 2314 2315 The AIJ format (also called the Yale sparse matrix format or 2316 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2317 storage. The stored row and column indices begin with zero. See the users manual for details. 2318 2319 The parallel matrix is partitioned such that the first m0 rows belong to 2320 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2321 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2322 2323 The DIAGONAL portion of the local submatrix of a processor can be defined 2324 as the submatrix which is obtained by extraction the part corresponding 2325 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2326 first row that belongs to the processor, and r2 is the last row belonging 2327 to the this processor. This is a square mxm matrix. The remaining portion 2328 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2329 2330 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2331 2332 Example usage: 2333 2334 Consider the following 8x8 matrix with 34 non-zero values, that is 2335 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2336 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2337 as follows: 2338 2339 .vb 2340 1 2 0 | 0 3 0 | 0 4 2341 Proc0 0 5 6 | 7 0 0 | 8 0 2342 9 0 10 | 11 0 0 | 12 0 2343 ------------------------------------- 2344 13 0 14 | 15 16 17 | 0 0 2345 Proc1 0 18 0 | 19 20 21 | 0 0 2346 0 0 0 | 22 23 0 | 24 0 2347 ------------------------------------- 2348 Proc2 25 26 27 | 0 0 28 | 29 0 2349 30 0 0 | 31 32 33 | 0 34 2350 .ve 2351 2352 This can be represented as a collection of submatrices as: 2353 2354 .vb 2355 A B C 2356 D E F 2357 G H I 2358 .ve 2359 2360 Where the submatrices A,B,C are owned by proc0, D,E,F are 2361 owned by proc1, G,H,I are owned by proc2. 2362 2363 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2364 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2365 The 'M','N' parameters are 8,8, and have the same values on all procs. 2366 2367 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2368 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2369 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2370 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2371 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2372 matrix, ans [DF] as another SeqAIJ matrix. 2373 2374 When d_nz, o_nz parameters are specified, d_nz storage elements are 2375 allocated for every row of the local diagonal submatrix, and o_nz 2376 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2377 One way to choose d_nz and o_nz is to use the max nonzerors per local 2378 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2379 In this case, the values of d_nz,o_nz are: 2380 .vb 2381 proc0 : dnz = 2, o_nz = 2 2382 proc1 : dnz = 3, o_nz = 2 2383 proc2 : dnz = 1, o_nz = 4 2384 .ve 2385 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2386 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2387 for proc3. i.e we are using 12+15+10=37 storage locations to store 2388 34 values. 2389 2390 When d_nnz, o_nnz parameters are specified, the storage is specified 2391 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2392 In the above case the values for d_nnz,o_nnz are: 2393 .vb 2394 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2395 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2396 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2397 .ve 2398 Here the space allocated is sum of all the above values i.e 34, and 2399 hence pre-allocation is perfect. 2400 2401 Level: intermediate 2402 2403 .keywords: matrix, aij, compressed row, sparse, parallel 2404 2405 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2406 MPIAIJ 2407 @*/ 2408 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2409 { 2410 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2411 2412 PetscFunctionBegin; 2413 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2414 if (f) { 2415 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2416 } 2417 PetscFunctionReturn(0); 2418 } 2419 2420 #undef __FUNCT__ 2421 #define __FUNCT__ "MatCreateMPIAIJ" 2422 /*@C 2423 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2424 (the default parallel PETSc format). For good matrix assembly performance 2425 the user should preallocate the matrix storage by setting the parameters 2426 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2427 performance can be increased by more than a factor of 50. 2428 2429 Collective on MPI_Comm 2430 2431 Input Parameters: 2432 + comm - MPI communicator 2433 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2434 This value should be the same as the local size used in creating the 2435 y vector for the matrix-vector product y = Ax. 2436 . n - This value should be the same as the local size used in creating the 2437 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2438 calculated if N is given) For square matrices n is almost always m. 2439 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2440 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2441 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2442 (same value is used for all local rows) 2443 . d_nnz - array containing the number of nonzeros in the various rows of the 2444 DIAGONAL portion of the local submatrix (possibly different for each row) 2445 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2446 The size of this array is equal to the number of local rows, i.e 'm'. 2447 You must leave room for the diagonal entry even if it is zero. 2448 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2449 submatrix (same value is used for all local rows). 2450 - o_nnz - array containing the number of nonzeros in the various rows of the 2451 OFF-DIAGONAL portion of the local submatrix (possibly different for 2452 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2453 structure. The size of this array is equal to the number 2454 of local rows, i.e 'm'. 2455 2456 Output Parameter: 2457 . A - the matrix 2458 2459 Notes: 2460 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2461 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2462 storage requirements for this matrix. 2463 2464 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2465 processor than it must be used on all processors that share the object for 2466 that argument. 2467 2468 The user MUST specify either the local or global matrix dimensions 2469 (possibly both). 2470 2471 The parallel matrix is partitioned such that the first m0 rows belong to 2472 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2473 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2474 2475 The DIAGONAL portion of the local submatrix of a processor can be defined 2476 as the submatrix which is obtained by extraction the part corresponding 2477 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2478 first row that belongs to the processor, and r2 is the last row belonging 2479 to the this processor. This is a square mxm matrix. The remaining portion 2480 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2481 2482 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2483 2484 When calling this routine with a single process communicator, a matrix of 2485 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2486 type of communicator, use the construction mechanism: 2487 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2488 2489 By default, this format uses inodes (identical nodes) when possible. 2490 We search for consecutive rows with the same nonzero structure, thereby 2491 reusing matrix information to achieve increased efficiency. 2492 2493 Options Database Keys: 2494 + -mat_aij_no_inode - Do not use inodes 2495 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2496 - -mat_aij_oneindex - Internally use indexing starting at 1 2497 rather than 0. Note that when calling MatSetValues(), 2498 the user still MUST index entries starting at 0! 2499 2500 2501 Example usage: 2502 2503 Consider the following 8x8 matrix with 34 non-zero values, that is 2504 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2505 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2506 as follows: 2507 2508 .vb 2509 1 2 0 | 0 3 0 | 0 4 2510 Proc0 0 5 6 | 7 0 0 | 8 0 2511 9 0 10 | 11 0 0 | 12 0 2512 ------------------------------------- 2513 13 0 14 | 15 16 17 | 0 0 2514 Proc1 0 18 0 | 19 20 21 | 0 0 2515 0 0 0 | 22 23 0 | 24 0 2516 ------------------------------------- 2517 Proc2 25 26 27 | 0 0 28 | 29 0 2518 30 0 0 | 31 32 33 | 0 34 2519 .ve 2520 2521 This can be represented as a collection of submatrices as: 2522 2523 .vb 2524 A B C 2525 D E F 2526 G H I 2527 .ve 2528 2529 Where the submatrices A,B,C are owned by proc0, D,E,F are 2530 owned by proc1, G,H,I are owned by proc2. 2531 2532 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2533 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2534 The 'M','N' parameters are 8,8, and have the same values on all procs. 2535 2536 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2537 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2538 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2539 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2540 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2541 matrix, ans [DF] as another SeqAIJ matrix. 2542 2543 When d_nz, o_nz parameters are specified, d_nz storage elements are 2544 allocated for every row of the local diagonal submatrix, and o_nz 2545 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2546 One way to choose d_nz and o_nz is to use the max nonzerors per local 2547 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2548 In this case, the values of d_nz,o_nz are: 2549 .vb 2550 proc0 : dnz = 2, o_nz = 2 2551 proc1 : dnz = 3, o_nz = 2 2552 proc2 : dnz = 1, o_nz = 4 2553 .ve 2554 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2555 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2556 for proc3. i.e we are using 12+15+10=37 storage locations to store 2557 34 values. 2558 2559 When d_nnz, o_nnz parameters are specified, the storage is specified 2560 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2561 In the above case the values for d_nnz,o_nnz are: 2562 .vb 2563 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2564 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2565 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2566 .ve 2567 Here the space allocated is sum of all the above values i.e 34, and 2568 hence pre-allocation is perfect. 2569 2570 Level: intermediate 2571 2572 .keywords: matrix, aij, compressed row, sparse, parallel 2573 2574 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2575 MPIAIJ 2576 @*/ 2577 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2578 { 2579 PetscErrorCode ierr; 2580 PetscMPIInt size; 2581 2582 PetscFunctionBegin; 2583 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2584 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2585 if (size > 1) { 2586 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2587 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2588 } else { 2589 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2590 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2591 } 2592 PetscFunctionReturn(0); 2593 } 2594 2595 #undef __FUNCT__ 2596 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2597 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2598 { 2599 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2600 2601 PetscFunctionBegin; 2602 *Ad = a->A; 2603 *Ao = a->B; 2604 *colmap = a->garray; 2605 PetscFunctionReturn(0); 2606 } 2607 2608 #undef __FUNCT__ 2609 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2610 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2611 { 2612 PetscErrorCode ierr; 2613 PetscInt i; 2614 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2615 2616 PetscFunctionBegin; 2617 if (coloring->ctype == IS_COLORING_LOCAL) { 2618 ISColoringValue *allcolors,*colors; 2619 ISColoring ocoloring; 2620 2621 /* set coloring for diagonal portion */ 2622 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2623 2624 /* set coloring for off-diagonal portion */ 2625 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2626 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2627 for (i=0; i<a->B->n; i++) { 2628 colors[i] = allcolors[a->garray[i]]; 2629 } 2630 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2631 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2632 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2633 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2634 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2635 ISColoringValue *colors; 2636 PetscInt *larray; 2637 ISColoring ocoloring; 2638 2639 /* set coloring for diagonal portion */ 2640 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2641 for (i=0; i<a->A->n; i++) { 2642 larray[i] = i + a->cstart; 2643 } 2644 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2645 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2646 for (i=0; i<a->A->n; i++) { 2647 colors[i] = coloring->colors[larray[i]]; 2648 } 2649 ierr = PetscFree(larray);CHKERRQ(ierr); 2650 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2651 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2652 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2653 2654 /* set coloring for off-diagonal portion */ 2655 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2656 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2657 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2658 for (i=0; i<a->B->n; i++) { 2659 colors[i] = coloring->colors[larray[i]]; 2660 } 2661 ierr = PetscFree(larray);CHKERRQ(ierr); 2662 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2663 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2664 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2665 } else { 2666 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2667 } 2668 2669 PetscFunctionReturn(0); 2670 } 2671 2672 #undef __FUNCT__ 2673 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2674 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2675 { 2676 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2677 PetscErrorCode ierr; 2678 2679 PetscFunctionBegin; 2680 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2681 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2682 PetscFunctionReturn(0); 2683 } 2684 2685 #undef __FUNCT__ 2686 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2687 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2688 { 2689 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2690 PetscErrorCode ierr; 2691 2692 PetscFunctionBegin; 2693 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2694 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2695 PetscFunctionReturn(0); 2696 } 2697 2698 #undef __FUNCT__ 2699 #define __FUNCT__ "MatMerge" 2700 /*@C 2701 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2702 matrices from each processor 2703 2704 Collective on MPI_Comm 2705 2706 Input Parameters: 2707 + comm - the communicators the parallel matrix will live on 2708 . inmat - the input sequential matrices 2709 . n - number of local columns (or PETSC_DECIDE) 2710 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2711 2712 Output Parameter: 2713 . outmat - the parallel matrix generated 2714 2715 Level: advanced 2716 2717 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2718 2719 @*/ 2720 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2721 { 2722 PetscErrorCode ierr; 2723 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2724 const PetscInt *indx; 2725 const PetscScalar *values; 2726 PetscMap columnmap,rowmap; 2727 2728 PetscFunctionBegin; 2729 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2730 /* 2731 PetscMPIInt rank; 2732 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2733 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2734 */ 2735 if (scall == MAT_INITIAL_MATRIX){ 2736 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2737 if (n == PETSC_DECIDE){ 2738 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2739 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2740 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2741 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2742 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2743 } 2744 2745 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2746 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2747 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2748 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2749 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2750 2751 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2752 for (i=0;i<m;i++) { 2753 ierr = MatGetRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2754 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2755 ierr = MatRestoreRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2756 } 2757 /* This routine will ONLY return MPIAIJ type matrix */ 2758 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2759 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2760 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2761 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2762 2763 } else if (scall == MAT_REUSE_MATRIX){ 2764 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2765 } else { 2766 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2767 } 2768 2769 for (i=0;i<m;i++) { 2770 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2771 I = i + rstart; 2772 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2773 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2774 } 2775 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2776 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2777 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2778 2779 PetscFunctionReturn(0); 2780 } 2781 2782 #undef __FUNCT__ 2783 #define __FUNCT__ "MatFileSplit" 2784 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2785 { 2786 PetscErrorCode ierr; 2787 PetscMPIInt rank; 2788 PetscInt m,N,i,rstart,nnz; 2789 size_t len; 2790 const PetscInt *indx; 2791 PetscViewer out; 2792 char *name; 2793 Mat B; 2794 const PetscScalar *values; 2795 2796 PetscFunctionBegin; 2797 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2798 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2799 /* Should this be the type of the diagonal block of A? */ 2800 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2801 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2802 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2803 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2804 for (i=0;i<m;i++) { 2805 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2806 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2807 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2808 } 2809 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2810 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2811 2812 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2813 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2814 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2815 sprintf(name,"%s.%d",outfile,rank); 2816 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2817 ierr = PetscFree(name); 2818 ierr = MatView(B,out);CHKERRQ(ierr); 2819 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2820 ierr = MatDestroy(B);CHKERRQ(ierr); 2821 PetscFunctionReturn(0); 2822 } 2823 2824 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2825 #undef __FUNCT__ 2826 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2827 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2828 { 2829 PetscErrorCode ierr; 2830 Mat_Merge_SeqsToMPI *merge; 2831 PetscObjectContainer container; 2832 2833 PetscFunctionBegin; 2834 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2835 if (container) { 2836 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2837 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2838 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2839 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2840 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2841 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2842 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2843 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2844 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2845 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2846 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2847 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2848 2849 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2850 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2851 } 2852 ierr = PetscFree(merge);CHKERRQ(ierr); 2853 2854 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2855 PetscFunctionReturn(0); 2856 } 2857 2858 #include "src/mat/utils/freespace.h" 2859 #include "petscbt.h" 2860 #undef __FUNCT__ 2861 #define __FUNCT__ "MatMerge_SeqsToMPI" 2862 /*@C 2863 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2864 matrices from each processor 2865 2866 Collective on MPI_Comm 2867 2868 Input Parameters: 2869 + comm - the communicators the parallel matrix will live on 2870 . seqmat - the input sequential matrices 2871 . m - number of local rows (or PETSC_DECIDE) 2872 . n - number of local columns (or PETSC_DECIDE) 2873 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2874 2875 Output Parameter: 2876 . mpimat - the parallel matrix generated 2877 2878 Level: advanced 2879 2880 Notes: 2881 The dimensions of the sequential matrix in each processor MUST be the same. 2882 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2883 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2884 @*/ 2885 static PetscEvent logkey_seqstompinum = 0; 2886 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2887 { 2888 PetscErrorCode ierr; 2889 MPI_Comm comm=mpimat->comm; 2890 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2891 PetscMPIInt size,rank,taga,*len_s; 2892 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2893 PetscInt proc,m; 2894 PetscInt **buf_ri,**buf_rj; 2895 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2896 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2897 MPI_Request *s_waits,*r_waits; 2898 MPI_Status *status; 2899 MatScalar *aa=a->a,**abuf_r,*ba_i; 2900 Mat_Merge_SeqsToMPI *merge; 2901 PetscObjectContainer container; 2902 2903 PetscFunctionBegin; 2904 if (!logkey_seqstompinum) { 2905 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2906 } 2907 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2908 2909 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2910 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2911 2912 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2913 if (container) { 2914 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2915 } 2916 bi = merge->bi; 2917 bj = merge->bj; 2918 buf_ri = merge->buf_ri; 2919 buf_rj = merge->buf_rj; 2920 2921 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2922 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2923 len_s = merge->len_s; 2924 2925 /* send and recv matrix values */ 2926 /*-----------------------------*/ 2927 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2928 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2929 2930 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2931 for (proc=0,k=0; proc<size; proc++){ 2932 if (!len_s[proc]) continue; 2933 i = owners[proc]; 2934 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2935 k++; 2936 } 2937 2938 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2939 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2940 ierr = PetscFree(status);CHKERRQ(ierr); 2941 2942 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2943 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2944 2945 /* insert mat values of mpimat */ 2946 /*----------------------------*/ 2947 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2948 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2949 nextrow = buf_ri_k + merge->nrecv; 2950 nextai = nextrow + merge->nrecv; 2951 2952 for (k=0; k<merge->nrecv; k++){ 2953 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2954 nrows = *(buf_ri_k[k]); 2955 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2956 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2957 } 2958 2959 /* set values of ba */ 2960 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2961 for (i=0; i<m; i++) { 2962 arow = owners[rank] + i; 2963 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2964 bnzi = bi[i+1] - bi[i]; 2965 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2966 2967 /* add local non-zero vals of this proc's seqmat into ba */ 2968 anzi = ai[arow+1] - ai[arow]; 2969 aj = a->j + ai[arow]; 2970 aa = a->a + ai[arow]; 2971 nextaj = 0; 2972 for (j=0; nextaj<anzi; j++){ 2973 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2974 ba_i[j] += aa[nextaj++]; 2975 } 2976 } 2977 2978 /* add received vals into ba */ 2979 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2980 /* i-th row */ 2981 if (i == *nextrow[k]) { 2982 anzi = *(nextai[k]+1) - *nextai[k]; 2983 aj = buf_rj[k] + *(nextai[k]); 2984 aa = abuf_r[k] + *(nextai[k]); 2985 nextaj = 0; 2986 for (j=0; nextaj<anzi; j++){ 2987 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2988 ba_i[j] += aa[nextaj++]; 2989 } 2990 } 2991 nextrow[k]++; nextai[k]++; 2992 } 2993 } 2994 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 2995 } 2996 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2997 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2998 2999 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3000 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3001 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3002 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3003 PetscFunctionReturn(0); 3004 } 3005 static PetscEvent logkey_seqstompisym = 0; 3006 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3007 { 3008 PetscErrorCode ierr; 3009 Mat B_mpi; 3010 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3011 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3012 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3013 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3014 PetscInt len,proc,*dnz,*onz; 3015 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3016 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3017 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3018 MPI_Status *status; 3019 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3020 PetscBT lnkbt; 3021 Mat_Merge_SeqsToMPI *merge; 3022 PetscObjectContainer container; 3023 3024 PetscFunctionBegin; 3025 if (!logkey_seqstompisym) { 3026 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3027 } 3028 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3029 3030 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3031 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3032 3033 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3034 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3035 3036 /* determine row ownership */ 3037 /*---------------------------------------------------------*/ 3038 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3039 if (m == PETSC_DECIDE) { 3040 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3041 } else { 3042 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3043 } 3044 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3045 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3046 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3047 3048 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3049 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3050 3051 /* determine the number of messages to send, their lengths */ 3052 /*---------------------------------------------------------*/ 3053 len_s = merge->len_s; 3054 3055 len = 0; /* length of buf_si[] */ 3056 merge->nsend = 0; 3057 for (proc=0; proc<size; proc++){ 3058 len_si[proc] = 0; 3059 if (proc == rank){ 3060 len_s[proc] = 0; 3061 } else { 3062 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3063 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3064 } 3065 if (len_s[proc]) { 3066 merge->nsend++; 3067 nrows = 0; 3068 for (i=owners[proc]; i<owners[proc+1]; i++){ 3069 if (ai[i+1] > ai[i]) nrows++; 3070 } 3071 len_si[proc] = 2*(nrows+1); 3072 len += len_si[proc]; 3073 } 3074 } 3075 3076 /* determine the number and length of messages to receive for ij-structure */ 3077 /*-------------------------------------------------------------------------*/ 3078 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3079 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3080 3081 /* post the Irecv of j-structure */ 3082 /*-------------------------------*/ 3083 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3084 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3085 3086 /* post the Isend of j-structure */ 3087 /*--------------------------------*/ 3088 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3089 sj_waits = si_waits + merge->nsend; 3090 3091 for (proc=0, k=0; proc<size; proc++){ 3092 if (!len_s[proc]) continue; 3093 i = owners[proc]; 3094 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3095 k++; 3096 } 3097 3098 /* receives and sends of j-structure are complete */ 3099 /*------------------------------------------------*/ 3100 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3101 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3102 3103 /* send and recv i-structure */ 3104 /*---------------------------*/ 3105 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3106 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3107 3108 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3109 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3110 for (proc=0,k=0; proc<size; proc++){ 3111 if (!len_s[proc]) continue; 3112 /* form outgoing message for i-structure: 3113 buf_si[0]: nrows to be sent 3114 [1:nrows]: row index (global) 3115 [nrows+1:2*nrows+1]: i-structure index 3116 */ 3117 /*-------------------------------------------*/ 3118 nrows = len_si[proc]/2 - 1; 3119 buf_si_i = buf_si + nrows+1; 3120 buf_si[0] = nrows; 3121 buf_si_i[0] = 0; 3122 nrows = 0; 3123 for (i=owners[proc]; i<owners[proc+1]; i++){ 3124 anzi = ai[i+1] - ai[i]; 3125 if (anzi) { 3126 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3127 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3128 nrows++; 3129 } 3130 } 3131 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3132 k++; 3133 buf_si += len_si[proc]; 3134 } 3135 3136 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3137 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3138 3139 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3140 for (i=0; i<merge->nrecv; i++){ 3141 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3142 } 3143 3144 ierr = PetscFree(len_si);CHKERRQ(ierr); 3145 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3146 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3147 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3148 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3149 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3150 ierr = PetscFree(status);CHKERRQ(ierr); 3151 3152 /* compute a local seq matrix in each processor */ 3153 /*----------------------------------------------*/ 3154 /* allocate bi array and free space for accumulating nonzero column info */ 3155 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3156 bi[0] = 0; 3157 3158 /* create and initialize a linked list */ 3159 nlnk = N+1; 3160 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3161 3162 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3163 len = 0; 3164 len = ai[owners[rank+1]] - ai[owners[rank]]; 3165 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3166 current_space = free_space; 3167 3168 /* determine symbolic info for each local row */ 3169 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3170 nextrow = buf_ri_k + merge->nrecv; 3171 nextai = nextrow + merge->nrecv; 3172 for (k=0; k<merge->nrecv; k++){ 3173 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3174 nrows = *buf_ri_k[k]; 3175 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3176 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3177 } 3178 3179 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3180 len = 0; 3181 for (i=0;i<m;i++) { 3182 bnzi = 0; 3183 /* add local non-zero cols of this proc's seqmat into lnk */ 3184 arow = owners[rank] + i; 3185 anzi = ai[arow+1] - ai[arow]; 3186 aj = a->j + ai[arow]; 3187 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3188 bnzi += nlnk; 3189 /* add received col data into lnk */ 3190 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3191 if (i == *nextrow[k]) { /* i-th row */ 3192 anzi = *(nextai[k]+1) - *nextai[k]; 3193 aj = buf_rj[k] + *nextai[k]; 3194 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3195 bnzi += nlnk; 3196 nextrow[k]++; nextai[k]++; 3197 } 3198 } 3199 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3200 3201 /* if free space is not available, make more free space */ 3202 if (current_space->local_remaining<bnzi) { 3203 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3204 nspacedouble++; 3205 } 3206 /* copy data into free space, then initialize lnk */ 3207 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3208 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3209 3210 current_space->array += bnzi; 3211 current_space->local_used += bnzi; 3212 current_space->local_remaining -= bnzi; 3213 3214 bi[i+1] = bi[i] + bnzi; 3215 } 3216 3217 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3218 3219 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3220 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3221 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3222 3223 /* create symbolic parallel matrix B_mpi */ 3224 /*---------------------------------------*/ 3225 if (n==PETSC_DECIDE) { 3226 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3227 } else { 3228 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3229 } 3230 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3231 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3232 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3233 3234 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3235 B_mpi->assembled = PETSC_FALSE; 3236 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3237 merge->bi = bi; 3238 merge->bj = bj; 3239 merge->buf_ri = buf_ri; 3240 merge->buf_rj = buf_rj; 3241 merge->coi = PETSC_NULL; 3242 merge->coj = PETSC_NULL; 3243 merge->owners_co = PETSC_NULL; 3244 3245 /* attach the supporting struct to B_mpi for reuse */ 3246 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3247 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3248 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3249 *mpimat = B_mpi; 3250 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3251 PetscFunctionReturn(0); 3252 } 3253 3254 static PetscEvent logkey_seqstompi = 0; 3255 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3256 { 3257 PetscErrorCode ierr; 3258 3259 PetscFunctionBegin; 3260 if (!logkey_seqstompi) { 3261 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3262 } 3263 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3264 if (scall == MAT_INITIAL_MATRIX){ 3265 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3266 } 3267 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3268 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3269 PetscFunctionReturn(0); 3270 } 3271 static PetscEvent logkey_getlocalmat = 0; 3272 #undef __FUNCT__ 3273 #define __FUNCT__ "MatGetLocalMat" 3274 /*@C 3275 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3276 3277 Not Collective 3278 3279 Input Parameters: 3280 + A - the matrix 3281 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3282 3283 Output Parameter: 3284 . A_loc - the local sequential matrix generated 3285 3286 Level: developer 3287 3288 @*/ 3289 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3290 { 3291 PetscErrorCode ierr; 3292 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3293 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3294 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3295 PetscScalar *aa=a->a,*ba=b->a,*ca; 3296 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3297 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3298 3299 PetscFunctionBegin; 3300 if (!logkey_getlocalmat) { 3301 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3302 } 3303 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3304 if (scall == MAT_INITIAL_MATRIX){ 3305 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3306 ci[0] = 0; 3307 for (i=0; i<am; i++){ 3308 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3309 } 3310 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3311 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3312 k = 0; 3313 for (i=0; i<am; i++) { 3314 ncols_o = bi[i+1] - bi[i]; 3315 ncols_d = ai[i+1] - ai[i]; 3316 /* off-diagonal portion of A */ 3317 for (jo=0; jo<ncols_o; jo++) { 3318 col = cmap[*bj]; 3319 if (col >= cstart) break; 3320 cj[k] = col; bj++; 3321 ca[k++] = *ba++; 3322 } 3323 /* diagonal portion of A */ 3324 for (j=0; j<ncols_d; j++) { 3325 cj[k] = cstart + *aj++; 3326 ca[k++] = *aa++; 3327 } 3328 /* off-diagonal portion of A */ 3329 for (j=jo; j<ncols_o; j++) { 3330 cj[k] = cmap[*bj++]; 3331 ca[k++] = *ba++; 3332 } 3333 } 3334 /* put together the new matrix */ 3335 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3336 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3337 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3338 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3339 mat->freedata = PETSC_TRUE; 3340 mat->nonew = 0; 3341 } else if (scall == MAT_REUSE_MATRIX){ 3342 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3343 ci = mat->i; cj = mat->j; ca = mat->a; 3344 for (i=0; i<am; i++) { 3345 /* off-diagonal portion of A */ 3346 ncols_o = bi[i+1] - bi[i]; 3347 for (jo=0; jo<ncols_o; jo++) { 3348 col = cmap[*bj]; 3349 if (col >= cstart) break; 3350 *ca++ = *ba++; bj++; 3351 } 3352 /* diagonal portion of A */ 3353 ncols_d = ai[i+1] - ai[i]; 3354 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3355 /* off-diagonal portion of A */ 3356 for (j=jo; j<ncols_o; j++) { 3357 *ca++ = *ba++; bj++; 3358 } 3359 } 3360 } else { 3361 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3362 } 3363 3364 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3365 PetscFunctionReturn(0); 3366 } 3367 3368 static PetscEvent logkey_getlocalmatcondensed = 0; 3369 #undef __FUNCT__ 3370 #define __FUNCT__ "MatGetLocalMatCondensed" 3371 /*@C 3372 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3373 3374 Not Collective 3375 3376 Input Parameters: 3377 + A - the matrix 3378 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3379 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3380 3381 Output Parameter: 3382 . A_loc - the local sequential matrix generated 3383 3384 Level: developer 3385 3386 @*/ 3387 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3388 { 3389 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3390 PetscErrorCode ierr; 3391 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3392 IS isrowa,iscola; 3393 Mat *aloc; 3394 3395 PetscFunctionBegin; 3396 if (!logkey_getlocalmatcondensed) { 3397 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3398 } 3399 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3400 if (!row){ 3401 start = a->rstart; end = a->rend; 3402 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3403 } else { 3404 isrowa = *row; 3405 } 3406 if (!col){ 3407 start = a->cstart; 3408 cmap = a->garray; 3409 nzA = a->A->n; 3410 nzB = a->B->n; 3411 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3412 ncols = 0; 3413 for (i=0; i<nzB; i++) { 3414 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3415 else break; 3416 } 3417 imark = i; 3418 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3419 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3420 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3421 ierr = PetscFree(idx);CHKERRQ(ierr); 3422 } else { 3423 iscola = *col; 3424 } 3425 if (scall != MAT_INITIAL_MATRIX){ 3426 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3427 aloc[0] = *A_loc; 3428 } 3429 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3430 *A_loc = aloc[0]; 3431 ierr = PetscFree(aloc);CHKERRQ(ierr); 3432 if (!row){ 3433 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3434 } 3435 if (!col){ 3436 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3437 } 3438 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3439 PetscFunctionReturn(0); 3440 } 3441 3442 static PetscEvent logkey_GetBrowsOfAcols = 0; 3443 #undef __FUNCT__ 3444 #define __FUNCT__ "MatGetBrowsOfAcols" 3445 /*@C 3446 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3447 3448 Collective on Mat 3449 3450 Input Parameters: 3451 + A,B - the matrices in mpiaij format 3452 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3453 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3454 3455 Output Parameter: 3456 + rowb, colb - index sets of rows and columns of B to extract 3457 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3458 - B_seq - the sequential matrix generated 3459 3460 Level: developer 3461 3462 @*/ 3463 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3464 { 3465 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3466 PetscErrorCode ierr; 3467 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3468 IS isrowb,iscolb; 3469 Mat *bseq; 3470 3471 PetscFunctionBegin; 3472 if (a->cstart != b->rstart || a->cend != b->rend){ 3473 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3474 } 3475 if (!logkey_GetBrowsOfAcols) { 3476 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3477 } 3478 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3479 3480 if (scall == MAT_INITIAL_MATRIX){ 3481 start = a->cstart; 3482 cmap = a->garray; 3483 nzA = a->A->n; 3484 nzB = a->B->n; 3485 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3486 ncols = 0; 3487 for (i=0; i<nzB; i++) { /* row < local row index */ 3488 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3489 else break; 3490 } 3491 imark = i; 3492 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3493 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3494 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3495 ierr = PetscFree(idx);CHKERRQ(ierr); 3496 *brstart = imark; 3497 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3498 } else { 3499 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3500 isrowb = *rowb; iscolb = *colb; 3501 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3502 bseq[0] = *B_seq; 3503 } 3504 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3505 *B_seq = bseq[0]; 3506 ierr = PetscFree(bseq);CHKERRQ(ierr); 3507 if (!rowb){ 3508 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3509 } else { 3510 *rowb = isrowb; 3511 } 3512 if (!colb){ 3513 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3514 } else { 3515 *colb = iscolb; 3516 } 3517 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3518 PetscFunctionReturn(0); 3519 } 3520 3521 static PetscEvent logkey_GetBrowsOfAocols = 0; 3522 #undef __FUNCT__ 3523 #define __FUNCT__ "MatGetBrowsOfAoCols" 3524 /*@C 3525 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3526 of the OFF-DIAGONAL portion of local A 3527 3528 Collective on Mat 3529 3530 Input Parameters: 3531 + A,B - the matrices in mpiaij format 3532 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3533 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3534 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3535 3536 Output Parameter: 3537 + B_oth - the sequential matrix generated 3538 3539 Level: developer 3540 3541 @*/ 3542 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3543 { 3544 VecScatter_MPI_General *gen_to,*gen_from; 3545 PetscErrorCode ierr; 3546 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3547 Mat_SeqAIJ *b_oth; 3548 VecScatter ctx=a->Mvctx; 3549 MPI_Comm comm=ctx->comm; 3550 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3551 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3552 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3553 PetscInt i,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj,*sstarts,*sstartsj,len; 3554 MPI_Request *rwaits,*swaits; 3555 MPI_Status *sstatus,rstatus; 3556 const PetscInt *cols; 3557 const PetscScalar *vals; 3558 PetscMPIInt j; 3559 3560 PetscFunctionBegin; 3561 if (a->cstart != b->rstart || a->cend != b->rend){ 3562 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3563 } 3564 if (!logkey_GetBrowsOfAocols) { 3565 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3566 } 3567 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3568 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3569 3570 gen_to = (VecScatter_MPI_General*)ctx->todata; 3571 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3572 rvalues = gen_from->values; /* holds the length of sending row */ 3573 svalues = gen_to->values; /* holds the length of receiving row */ 3574 nrecvs = gen_from->n; 3575 nsends = gen_to->n; 3576 rwaits = gen_from->requests; 3577 swaits = gen_to->requests; 3578 rrow = gen_from->indices; /* local row index to be received */ 3579 srow = gen_to->indices; /* local row index to be sent */ 3580 rstarts = gen_from->starts; 3581 sstarts = gen_to->starts; 3582 rprocs = gen_from->procs; 3583 sprocs = gen_to->procs; 3584 sstatus = gen_to->sstatus; 3585 3586 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3587 if (scall == MAT_INITIAL_MATRIX){ 3588 /* i-array */ 3589 /*---------*/ 3590 /* post receives */ 3591 for (i=0; i<nrecvs; i++){ 3592 rowlen = (PetscInt*)rvalues + rstarts[i]; 3593 nrows = rstarts[i+1]-rstarts[i]; 3594 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3595 } 3596 3597 /* pack the outgoing message */ 3598 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3599 rstartsj = sstartsj + nsends +1; 3600 sstartsj[0] = 0; rstartsj[0] = 0; 3601 len = 0; /* total length of j or a array to be sent */ 3602 k = 0; 3603 for (i=0; i<nsends; i++){ 3604 rowlen = (PetscInt*)svalues + sstarts[i]; 3605 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3606 for (j=0; j<nrows; j++) { 3607 row = srow[k] + b->rowners[rank]; /* global row idx */ 3608 ierr = MatGetRow(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3609 len += rowlen[j]; 3610 ierr = MatRestoreRow(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3611 k++; 3612 } 3613 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3614 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3615 } 3616 /* recvs and sends of i-array are completed */ 3617 i = nrecvs; 3618 while (i--) { 3619 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3620 } 3621 if (nsends) { 3622 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3623 } 3624 /* allocate buffers for sending j and a arrays */ 3625 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3626 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3627 3628 /* create i-array of B_oth */ 3629 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3630 b_othi[0] = 0; 3631 len = 0; /* total length of j or a array to be received */ 3632 k = 0; 3633 for (i=0; i<nrecvs; i++){ 3634 rowlen = (PetscInt*)rvalues + rstarts[i]; 3635 nrows = rstarts[i+1]-rstarts[i]; 3636 for (j=0; j<nrows; j++) { 3637 b_othi[k+1] = b_othi[k] + rowlen[j]; 3638 len += rowlen[j]; k++; 3639 } 3640 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3641 } 3642 3643 /* allocate space for j and a arrrays of B_oth */ 3644 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3645 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3646 3647 /* j-array */ 3648 /*---------*/ 3649 /* post receives of j-array */ 3650 for (i=0; i<nrecvs; i++){ 3651 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3652 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3653 } 3654 k = 0; 3655 for (i=0; i<nsends; i++){ 3656 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3657 bufJ = bufj+sstartsj[i]; 3658 for (j=0; j<nrows; j++) { 3659 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3660 ierr = MatGetRow(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3661 for (l=0; l<ncols; l++){ 3662 *bufJ++ = cols[l]; 3663 } 3664 ierr = MatRestoreRow(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3665 } 3666 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3667 } 3668 3669 /* recvs and sends of j-array are completed */ 3670 i = nrecvs; 3671 while (i--) { 3672 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3673 } 3674 if (nsends) { 3675 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3676 } 3677 } else if (scall == MAT_REUSE_MATRIX){ 3678 sstartsj = *startsj; 3679 rstartsj = sstartsj + nsends +1; 3680 bufa = *bufa_ptr; 3681 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3682 b_otha = b_oth->a; 3683 } else { 3684 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3685 } 3686 3687 /* a-array */ 3688 /*---------*/ 3689 /* post receives of a-array */ 3690 for (i=0; i<nrecvs; i++){ 3691 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3692 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3693 } 3694 k = 0; 3695 for (i=0; i<nsends; i++){ 3696 nrows = sstarts[i+1]-sstarts[i]; 3697 bufA = bufa+sstartsj[i]; 3698 for (j=0; j<nrows; j++) { 3699 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3700 ierr = MatGetRow(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3701 for (l=0; l<ncols; l++){ 3702 *bufA++ = vals[l]; 3703 } 3704 ierr = MatRestoreRow(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3705 3706 } 3707 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3708 } 3709 /* recvs and sends of a-array are completed */ 3710 i = nrecvs; 3711 while (i--) { 3712 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3713 } 3714 if (nsends) { 3715 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3716 } 3717 3718 if (scall == MAT_INITIAL_MATRIX){ 3719 /* put together the new matrix */ 3720 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3721 3722 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3723 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3724 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3725 b_oth->freedata = PETSC_TRUE; 3726 b_oth->nonew = 0; 3727 3728 ierr = PetscFree(bufj);CHKERRQ(ierr); 3729 if (!startsj || !bufa_ptr){ 3730 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3731 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3732 } else { 3733 *startsj = sstartsj; 3734 *bufa_ptr = bufa; 3735 } 3736 } 3737 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3738 3739 PetscFunctionReturn(0); 3740 } 3741 3742 /*MC 3743 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3744 3745 Options Database Keys: 3746 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3747 3748 Level: beginner 3749 3750 .seealso: MatCreateMPIAIJ 3751 M*/ 3752 3753 EXTERN_C_BEGIN 3754 #undef __FUNCT__ 3755 #define __FUNCT__ "MatCreate_MPIAIJ" 3756 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3757 { 3758 Mat_MPIAIJ *b; 3759 PetscErrorCode ierr; 3760 PetscInt i; 3761 PetscMPIInt size; 3762 3763 PetscFunctionBegin; 3764 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3765 3766 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3767 B->data = (void*)b; 3768 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3769 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3770 B->factor = 0; 3771 B->assembled = PETSC_FALSE; 3772 B->mapping = 0; 3773 3774 B->insertmode = NOT_SET_VALUES; 3775 b->size = size; 3776 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3777 3778 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3779 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3780 3781 /* the information in the maps duplicates the information computed below, eventually 3782 we should remove the duplicate information that is not contained in the maps */ 3783 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3784 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3785 3786 /* build local table of row and column ownerships */ 3787 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3788 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3789 b->cowners = b->rowners + b->size + 2; 3790 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3791 b->rowners[0] = 0; 3792 for (i=2; i<=b->size; i++) { 3793 b->rowners[i] += b->rowners[i-1]; 3794 } 3795 b->rstart = b->rowners[b->rank]; 3796 b->rend = b->rowners[b->rank+1]; 3797 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3798 b->cowners[0] = 0; 3799 for (i=2; i<=b->size; i++) { 3800 b->cowners[i] += b->cowners[i-1]; 3801 } 3802 b->cstart = b->cowners[b->rank]; 3803 b->cend = b->cowners[b->rank+1]; 3804 3805 /* build cache for off array entries formed */ 3806 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3807 b->donotstash = PETSC_FALSE; 3808 b->colmap = 0; 3809 b->garray = 0; 3810 b->roworiented = PETSC_TRUE; 3811 3812 /* stuff used for matrix vector multiply */ 3813 b->lvec = PETSC_NULL; 3814 b->Mvctx = PETSC_NULL; 3815 3816 /* stuff for MatGetRow() */ 3817 b->rowindices = 0; 3818 b->rowvalues = 0; 3819 b->getrowactive = PETSC_FALSE; 3820 3821 /* Explicitly create 2 MATSEQAIJ matrices. */ 3822 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3823 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3824 PetscLogObjectParent(B,b->A); 3825 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3826 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3827 PetscLogObjectParent(B,b->B); 3828 3829 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3830 "MatStoreValues_MPIAIJ", 3831 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3832 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3833 "MatRetrieveValues_MPIAIJ", 3834 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3835 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3836 "MatGetDiagonalBlock_MPIAIJ", 3837 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3838 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3839 "MatIsTranspose_MPIAIJ", 3840 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3841 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3842 "MatMPIAIJSetPreallocation_MPIAIJ", 3843 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3844 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3845 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3846 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3847 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3848 "MatDiagonalScaleLocal_MPIAIJ", 3849 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3850 PetscFunctionReturn(0); 3851 } 3852 EXTERN_C_END 3853