1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; \ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIAIJ" 107 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 110 PetscScalar value; 111 PetscErrorCode ierr; 112 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 113 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 114 PetscTruth roworiented = aij->roworiented; 115 116 /* Some Variables required in the macro */ 117 Mat A = aij->A; 118 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 119 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 120 PetscScalar *aa = a->a; 121 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 122 Mat B = aij->B; 123 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 124 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 125 PetscScalar *ba = b->a; 126 127 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 128 PetscInt nonew = a->nonew; 129 PetscScalar *ap1,*ap2; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (im[i] < 0) continue; 134 #if defined(PETSC_USE_DEBUG) 135 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 136 #endif 137 if (im[i] >= rstart && im[i] < rend) { 138 row = im[i] - rstart; 139 lastcol1 = -1; 140 rp1 = aj + ai[row]; 141 ap1 = aa + ai[row]; 142 rmax1 = aimax[row]; 143 nrow1 = ailen[row]; 144 low1 = 0; 145 high1 = nrow1; 146 lastcol2 = -1; 147 rp2 = bj + bi[row]; 148 ap2 = ba + bi[row]; 149 rmax2 = bimax[row]; 150 nrow2 = bilen[row]; 151 low2 = 0; 152 high2 = nrow2; 153 154 for (j=0; j<n; j++) { 155 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 156 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 157 if (in[j] >= cstart && in[j] < cend){ 158 col = in[j] - cstart; 159 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 160 } else if (in[j] < 0) continue; 161 #if defined(PETSC_USE_DEBUG) 162 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 163 #endif 164 else { 165 if (mat->was_assembled) { 166 if (!aij->colmap) { 167 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 168 } 169 #if defined (PETSC_USE_CTABLE) 170 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 171 col--; 172 #else 173 col = aij->colmap[in[j]] - 1; 174 #endif 175 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 176 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 177 col = in[j]; 178 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 179 B = aij->B; 180 b = (Mat_SeqAIJ*)B->data; 181 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 182 rp2 = bj + bi[row]; 183 ap2 = ba + bi[row]; 184 rmax2 = bimax[row]; 185 nrow2 = bilen[row]; 186 low2 = 0; 187 high2 = nrow2; 188 bm = aij->B->m; 189 ba = b->a; 190 } 191 } else col = in[j]; 192 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 193 } 194 } 195 } else { 196 if (!aij->donotstash) { 197 if (roworiented) { 198 if (ignorezeroentries && v[i*n] == 0.0) continue; 199 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 200 } else { 201 if (ignorezeroentries && v[i] == 0.0) continue; 202 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 203 } 204 } 205 } 206 } 207 PetscFunctionReturn(0); 208 } 209 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatGetValues_MPIAIJ" 213 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 214 { 215 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 216 PetscErrorCode ierr; 217 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 218 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 219 220 PetscFunctionBegin; 221 for (i=0; i<m; i++) { 222 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 223 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 224 if (idxm[i] >= rstart && idxm[i] < rend) { 225 row = idxm[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 228 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 229 if (idxn[j] >= cstart && idxn[j] < cend){ 230 col = idxn[j] - cstart; 231 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 232 } else { 233 if (!aij->colmap) { 234 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 235 } 236 #if defined (PETSC_USE_CTABLE) 237 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 238 col --; 239 #else 240 col = aij->colmap[idxn[j]] - 1; 241 #endif 242 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 243 else { 244 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 245 } 246 } 247 } 248 } else { 249 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 250 } 251 } 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 257 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 258 { 259 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 260 PetscErrorCode ierr; 261 PetscInt nstash,reallocs; 262 InsertMode addv; 263 264 PetscFunctionBegin; 265 if (aij->donotstash) { 266 PetscFunctionReturn(0); 267 } 268 269 /* make sure all processors are either in INSERTMODE or ADDMODE */ 270 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 271 if (addv == (ADD_VALUES|INSERT_VALUES)) { 272 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 273 } 274 mat->insertmode = addv; /* in case this processor had no cache */ 275 276 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 277 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 278 ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 284 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 288 PetscErrorCode ierr; 289 PetscMPIInt n; 290 PetscInt i,j,rstart,ncols,flg; 291 PetscInt *row,*col,other_disassembled; 292 PetscScalar *val; 293 InsertMode addv = mat->insertmode; 294 295 PetscFunctionBegin; 296 if (!aij->donotstash) { 297 while (1) { 298 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 299 if (!flg) break; 300 301 for (i=0; i<n;) { 302 /* Now identify the consecutive vals belonging to the same row */ 303 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 304 if (j < n) ncols = j-i; 305 else ncols = n-i; 306 /* Now assemble all these values with a single function call */ 307 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 308 i = j; 309 } 310 } 311 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 312 } 313 a->compressedrow.use = PETSC_FALSE; 314 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 315 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 316 317 /* determine if any processor has disassembled, if so we must 318 also disassemble ourselfs, in order that we may reassemble. */ 319 /* 320 if nonzero structure of submatrix B cannot change then we know that 321 no processor disassembled thus we can skip this stuff 322 */ 323 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 324 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 325 if (mat->was_assembled && !other_disassembled) { 326 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 327 } 328 } 329 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 330 b = (Mat_SeqAIJ *)aij->B->data; 331 332 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 333 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 334 } 335 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 336 b->compressedrow.use = PETSC_TRUE; 337 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 338 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 339 340 if (aij->rowvalues) { 341 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 342 aij->rowvalues = 0; 343 } 344 345 /* used by MatAXPY() */ 346 a->xtoy = 0; b->xtoy = 0; 347 a->XtoY = 0; b->XtoY = 0; 348 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 354 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 355 { 356 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 357 PetscErrorCode ierr; 358 359 PetscFunctionBegin; 360 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 361 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNCT__ 366 #define __FUNCT__ "MatZeroRows_MPIAIJ" 367 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 368 { 369 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 370 PetscErrorCode ierr; 371 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 372 PetscInt i,*owners = l->rowners; 373 PetscInt *nprocs,j,idx,nsends,row; 374 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 375 PetscInt *rvalues,count,base,slen,*source; 376 PetscInt *lens,*lrows,*values,rstart=l->rstart; 377 MPI_Comm comm = A->comm; 378 MPI_Request *send_waits,*recv_waits; 379 MPI_Status recv_status,*send_status; 380 #if defined(PETSC_DEBUG) 381 PetscTruth found = PETSC_FALSE; 382 #endif 383 384 PetscFunctionBegin; 385 /* first count number of contributors to each processor */ 386 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 387 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 388 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 389 j = 0; 390 for (i=0; i<N; i++) { 391 if (lastidx > (idx = rows[i])) j = 0; 392 lastidx = idx; 393 for (; j<size; j++) { 394 if (idx >= owners[j] && idx < owners[j+1]) { 395 nprocs[2*j]++; 396 nprocs[2*j+1] = 1; 397 owner[i] = j; 398 #if defined(PETSC_DEBUG) 399 found = PETSC_TRUE; 400 #endif 401 break; 402 } 403 } 404 #if defined(PETSC_DEBUG) 405 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 406 found = PETSC_FALSE; 407 #endif 408 } 409 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 410 411 /* inform other processors of number of messages and max length*/ 412 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 413 414 /* post receives: */ 415 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 416 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 417 for (i=0; i<nrecvs; i++) { 418 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 419 } 420 421 /* do sends: 422 1) starts[i] gives the starting index in svalues for stuff going to 423 the ith processor 424 */ 425 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 426 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 427 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 428 starts[0] = 0; 429 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 430 for (i=0; i<N; i++) { 431 svalues[starts[owner[i]]++] = rows[i]; 432 } 433 434 starts[0] = 0; 435 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 436 count = 0; 437 for (i=0; i<size; i++) { 438 if (nprocs[2*i+1]) { 439 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 440 } 441 } 442 ierr = PetscFree(starts);CHKERRQ(ierr); 443 444 base = owners[rank]; 445 446 /* wait on receives */ 447 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 448 source = lens + nrecvs; 449 count = nrecvs; slen = 0; 450 while (count) { 451 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 452 /* unpack receives into our local space */ 453 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 454 source[imdex] = recv_status.MPI_SOURCE; 455 lens[imdex] = n; 456 slen += n; 457 count--; 458 } 459 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 460 461 /* move the data into the send scatter */ 462 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 463 count = 0; 464 for (i=0; i<nrecvs; i++) { 465 values = rvalues + i*nmax; 466 for (j=0; j<lens[i]; j++) { 467 lrows[count++] = values[j] - base; 468 } 469 } 470 ierr = PetscFree(rvalues);CHKERRQ(ierr); 471 ierr = PetscFree(lens);CHKERRQ(ierr); 472 ierr = PetscFree(owner);CHKERRQ(ierr); 473 ierr = PetscFree(nprocs);CHKERRQ(ierr); 474 475 /* actually zap the local rows */ 476 /* 477 Zero the required rows. If the "diagonal block" of the matrix 478 is square and the user wishes to set the diagonal we use seperate 479 code so that MatSetValues() is not called for each diagonal allocating 480 new memory, thus calling lots of mallocs and slowing things down. 481 482 Contributed by: Matthew Knepley 483 */ 484 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 485 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 486 if ((diag != 0.0) && (l->A->M == l->A->N)) { 487 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 488 } else if (diag != 0.0) { 489 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 490 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 491 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 492 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 493 } 494 for (i = 0; i < slen; i++) { 495 row = lrows[i] + rstart; 496 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 497 } 498 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 499 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 500 } else { 501 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 502 } 503 ierr = PetscFree(lrows);CHKERRQ(ierr); 504 505 /* wait on sends */ 506 if (nsends) { 507 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 508 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 509 ierr = PetscFree(send_status);CHKERRQ(ierr); 510 } 511 ierr = PetscFree(send_waits);CHKERRQ(ierr); 512 ierr = PetscFree(svalues);CHKERRQ(ierr); 513 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "MatMult_MPIAIJ" 519 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 520 { 521 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 522 PetscErrorCode ierr; 523 PetscInt nt; 524 525 PetscFunctionBegin; 526 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 527 if (nt != A->n) { 528 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 529 } 530 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 531 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 532 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 533 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 534 PetscFunctionReturn(0); 535 } 536 537 #undef __FUNCT__ 538 #define __FUNCT__ "MatMultAdd_MPIAIJ" 539 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 540 { 541 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 542 PetscErrorCode ierr; 543 544 PetscFunctionBegin; 545 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 546 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 547 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 548 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 549 PetscFunctionReturn(0); 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 554 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 555 { 556 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 557 PetscErrorCode ierr; 558 PetscTruth merged; 559 560 PetscFunctionBegin; 561 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 562 /* do nondiagonal part */ 563 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 564 if (!merged) { 565 /* send it on its way */ 566 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 567 /* do local part */ 568 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 569 /* receive remote parts: note this assumes the values are not actually */ 570 /* added in yy until the next line, */ 571 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 572 } else { 573 /* do local part */ 574 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 575 /* send it on its way */ 576 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 577 /* values actually were received in the Begin() but we need to call this nop */ 578 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 579 } 580 PetscFunctionReturn(0); 581 } 582 583 EXTERN_C_BEGIN 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 586 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 587 { 588 MPI_Comm comm; 589 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 590 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 591 IS Me,Notme; 592 PetscErrorCode ierr; 593 PetscInt M,N,first,last,*notme,i; 594 PetscMPIInt size; 595 596 PetscFunctionBegin; 597 598 /* Easy test: symmetric diagonal block */ 599 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 600 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 601 if (!*f) PetscFunctionReturn(0); 602 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 603 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 604 if (size == 1) PetscFunctionReturn(0); 605 606 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 607 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 608 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 609 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 610 for (i=0; i<first; i++) notme[i] = i; 611 for (i=last; i<M; i++) notme[i-last+first] = i; 612 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 613 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 614 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 615 Aoff = Aoffs[0]; 616 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 617 Boff = Boffs[0]; 618 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 619 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 620 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 621 ierr = ISDestroy(Me);CHKERRQ(ierr); 622 ierr = ISDestroy(Notme);CHKERRQ(ierr); 623 624 PetscFunctionReturn(0); 625 } 626 EXTERN_C_END 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 630 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 631 { 632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 633 PetscErrorCode ierr; 634 635 PetscFunctionBegin; 636 /* do nondiagonal part */ 637 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 638 /* send it on its way */ 639 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 640 /* do local part */ 641 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 642 /* receive remote parts */ 643 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 /* 648 This only works correctly for square matrices where the subblock A->A is the 649 diagonal block 650 */ 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 653 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 654 { 655 PetscErrorCode ierr; 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 658 PetscFunctionBegin; 659 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 660 if (a->rstart != a->cstart || a->rend != a->cend) { 661 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 662 } 663 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatScale_MPIAIJ" 669 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 670 { 671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 672 PetscErrorCode ierr; 673 674 PetscFunctionBegin; 675 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 676 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatDestroy_MPIAIJ" 682 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 PetscErrorCode ierr; 686 687 PetscFunctionBegin; 688 #if defined(PETSC_USE_LOG) 689 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 697 #else 698 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 699 #endif 700 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 701 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 702 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 703 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 704 ierr = PetscFree(aij);CHKERRQ(ierr); 705 706 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 707 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 708 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 709 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 710 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 711 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 712 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatView_MPIAIJ_Binary" 718 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 722 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 723 PetscErrorCode ierr; 724 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 725 int fd; 726 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 727 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 728 PetscScalar *column_values; 729 730 PetscFunctionBegin; 731 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 732 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 733 nz = A->nz + B->nz; 734 if (!rank) { 735 header[0] = MAT_FILE_COOKIE; 736 header[1] = mat->M; 737 header[2] = mat->N; 738 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 739 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 740 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 741 /* get largest number of rows any processor has */ 742 rlen = mat->m; 743 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 744 for (i=1; i<size; i++) { 745 rlen = PetscMax(rlen,range[i+1] - range[i]); 746 } 747 } else { 748 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 749 rlen = mat->m; 750 } 751 752 /* load up the local row counts */ 753 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 754 for (i=0; i<mat->m; i++) { 755 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 756 } 757 758 /* store the row lengths to the file */ 759 if (!rank) { 760 MPI_Status status; 761 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 762 for (i=1; i<size; i++) { 763 rlen = range[i+1] - range[i]; 764 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 765 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 766 } 767 } else { 768 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 769 } 770 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 771 772 /* load up the local column indices */ 773 nzmax = nz; /* )th processor needs space a largest processor needs */ 774 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 775 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 776 cnt = 0; 777 for (i=0; i<mat->m; i++) { 778 for (j=B->i[i]; j<B->i[i+1]; j++) { 779 if ( (col = garray[B->j[j]]) > cstart) break; 780 column_indices[cnt++] = col; 781 } 782 for (k=A->i[i]; k<A->i[i+1]; k++) { 783 column_indices[cnt++] = A->j[k] + cstart; 784 } 785 for (; j<B->i[i+1]; j++) { 786 column_indices[cnt++] = garray[B->j[j]]; 787 } 788 } 789 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 790 791 /* store the column indices to the file */ 792 if (!rank) { 793 MPI_Status status; 794 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 795 for (i=1; i<size; i++) { 796 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 797 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 798 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 799 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 800 } 801 } else { 802 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 803 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 804 } 805 ierr = PetscFree(column_indices);CHKERRQ(ierr); 806 807 /* load up the local column values */ 808 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 809 cnt = 0; 810 for (i=0; i<mat->m; i++) { 811 for (j=B->i[i]; j<B->i[i+1]; j++) { 812 if ( garray[B->j[j]] > cstart) break; 813 column_values[cnt++] = B->a[j]; 814 } 815 for (k=A->i[i]; k<A->i[i+1]; k++) { 816 column_values[cnt++] = A->a[k]; 817 } 818 for (; j<B->i[i+1]; j++) { 819 column_values[cnt++] = B->a[j]; 820 } 821 } 822 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 823 824 /* store the column values to the file */ 825 if (!rank) { 826 MPI_Status status; 827 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 828 for (i=1; i<size; i++) { 829 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 830 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 831 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 832 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 833 } 834 } else { 835 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 836 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 837 } 838 ierr = PetscFree(column_values);CHKERRQ(ierr); 839 PetscFunctionReturn(0); 840 } 841 842 #undef __FUNCT__ 843 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 844 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 845 { 846 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 847 PetscErrorCode ierr; 848 PetscMPIInt rank = aij->rank,size = aij->size; 849 PetscTruth isdraw,iascii,isbinary; 850 PetscViewer sviewer; 851 PetscViewerFormat format; 852 853 PetscFunctionBegin; 854 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 855 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 856 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 857 if (iascii) { 858 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 859 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 860 MatInfo info; 861 PetscTruth inodes; 862 863 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 864 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 865 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 866 if (!inodes) { 867 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 868 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 869 } else { 870 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 871 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 872 } 873 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 874 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 875 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 876 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 877 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 878 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } else if (format == PETSC_VIEWER_ASCII_INFO) { 881 PetscInt inodecount,inodelimit,*inodes; 882 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 883 if (inodes) { 884 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 885 } else { 886 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 887 } 888 PetscFunctionReturn(0); 889 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 890 PetscFunctionReturn(0); 891 } 892 } else if (isbinary) { 893 if (size == 1) { 894 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 895 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 896 } else { 897 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 898 } 899 PetscFunctionReturn(0); 900 } else if (isdraw) { 901 PetscDraw draw; 902 PetscTruth isnull; 903 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 904 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 905 } 906 907 if (size == 1) { 908 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 909 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 910 } else { 911 /* assemble the entire matrix onto first processor. */ 912 Mat A; 913 Mat_SeqAIJ *Aloc; 914 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 915 PetscScalar *a; 916 917 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 918 if (!rank) { 919 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 920 } else { 921 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 922 } 923 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 924 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 925 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 926 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 927 928 /* copy over the A part */ 929 Aloc = (Mat_SeqAIJ*)aij->A->data; 930 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 931 row = aij->rstart; 932 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 933 for (i=0; i<m; i++) { 934 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 935 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 936 } 937 aj = Aloc->j; 938 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 939 940 /* copy over the B part */ 941 Aloc = (Mat_SeqAIJ*)aij->B->data; 942 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 943 row = aij->rstart; 944 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 945 ct = cols; 946 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 947 for (i=0; i<m; i++) { 948 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 949 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 950 } 951 ierr = PetscFree(ct);CHKERRQ(ierr); 952 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 953 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 954 /* 955 Everyone has to call to draw the matrix since the graphics waits are 956 synchronized across all processors that share the PetscDraw object 957 */ 958 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 959 if (!rank) { 960 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 961 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 962 } 963 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 964 ierr = MatDestroy(A);CHKERRQ(ierr); 965 } 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatView_MPIAIJ" 971 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 972 { 973 PetscErrorCode ierr; 974 PetscTruth iascii,isdraw,issocket,isbinary; 975 976 PetscFunctionBegin; 977 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 978 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 979 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 980 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 981 if (iascii || isdraw || isbinary || issocket) { 982 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 983 } else { 984 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 985 } 986 PetscFunctionReturn(0); 987 } 988 989 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatRelax_MPIAIJ" 993 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 994 { 995 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 996 PetscErrorCode ierr; 997 Vec bb1; 998 999 PetscFunctionBegin; 1000 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1001 1002 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1003 1004 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1005 if (flag & SOR_ZERO_INITIAL_GUESS) { 1006 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1007 its--; 1008 } 1009 1010 while (its--) { 1011 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1012 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1013 1014 /* update rhs: bb1 = bb - B*x */ 1015 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1016 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1017 1018 /* local sweep */ 1019 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1020 CHKERRQ(ierr); 1021 } 1022 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1023 if (flag & SOR_ZERO_INITIAL_GUESS) { 1024 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1025 its--; 1026 } 1027 while (its--) { 1028 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1030 1031 /* update rhs: bb1 = bb - B*x */ 1032 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1033 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1034 1035 /* local sweep */ 1036 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1037 CHKERRQ(ierr); 1038 } 1039 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1040 if (flag & SOR_ZERO_INITIAL_GUESS) { 1041 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1042 its--; 1043 } 1044 while (its--) { 1045 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1046 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1047 1048 /* update rhs: bb1 = bb - B*x */ 1049 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1050 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1051 1052 /* local sweep */ 1053 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1054 CHKERRQ(ierr); 1055 } 1056 } else { 1057 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1058 } 1059 1060 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatPermute_MPIAIJ" 1066 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1067 { 1068 MPI_Comm comm,pcomm; 1069 PetscInt first,local_size,nrows,*rows; 1070 int ntids; 1071 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1072 PetscErrorCode ierr; 1073 1074 PetscFunctionBegin; 1075 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1076 /* make a collective version of 'rowp' */ 1077 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1078 if (pcomm==comm) { 1079 crowp = rowp; 1080 } else { 1081 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1082 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1083 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1084 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1085 } 1086 /* collect the global row permutation and invert it */ 1087 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1088 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1089 if (pcomm!=comm) { 1090 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1091 } 1092 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1093 /* get the local target indices */ 1094 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1095 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1096 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1097 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1098 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1099 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1100 /* the column permutation is so much easier; 1101 make a local version of 'colp' and invert it */ 1102 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1103 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1104 if (ntids==1) { 1105 lcolp = colp; 1106 } else { 1107 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1108 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1109 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1110 } 1111 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1112 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1113 if (ntids>1) { 1114 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1115 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1116 } 1117 /* now we just get the submatrix */ 1118 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1119 /* clean up */ 1120 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1121 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1127 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1128 { 1129 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1130 Mat A = mat->A,B = mat->B; 1131 PetscErrorCode ierr; 1132 PetscReal isend[5],irecv[5]; 1133 1134 PetscFunctionBegin; 1135 info->block_size = 1.0; 1136 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1137 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1138 isend[3] = info->memory; isend[4] = info->mallocs; 1139 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1140 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1141 isend[3] += info->memory; isend[4] += info->mallocs; 1142 if (flag == MAT_LOCAL) { 1143 info->nz_used = isend[0]; 1144 info->nz_allocated = isend[1]; 1145 info->nz_unneeded = isend[2]; 1146 info->memory = isend[3]; 1147 info->mallocs = isend[4]; 1148 } else if (flag == MAT_GLOBAL_MAX) { 1149 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1150 info->nz_used = irecv[0]; 1151 info->nz_allocated = irecv[1]; 1152 info->nz_unneeded = irecv[2]; 1153 info->memory = irecv[3]; 1154 info->mallocs = irecv[4]; 1155 } else if (flag == MAT_GLOBAL_SUM) { 1156 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1157 info->nz_used = irecv[0]; 1158 info->nz_allocated = irecv[1]; 1159 info->nz_unneeded = irecv[2]; 1160 info->memory = irecv[3]; 1161 info->mallocs = irecv[4]; 1162 } 1163 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1164 info->fill_ratio_needed = 0; 1165 info->factor_mallocs = 0; 1166 info->rows_global = (double)matin->M; 1167 info->columns_global = (double)matin->N; 1168 info->rows_local = (double)matin->m; 1169 info->columns_local = (double)matin->N; 1170 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatSetOption_MPIAIJ" 1176 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1177 { 1178 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1179 PetscErrorCode ierr; 1180 1181 PetscFunctionBegin; 1182 switch (op) { 1183 case MAT_NO_NEW_NONZERO_LOCATIONS: 1184 case MAT_YES_NEW_NONZERO_LOCATIONS: 1185 case MAT_COLUMNS_UNSORTED: 1186 case MAT_COLUMNS_SORTED: 1187 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1188 case MAT_KEEP_ZEROED_ROWS: 1189 case MAT_NEW_NONZERO_LOCATION_ERR: 1190 case MAT_USE_INODES: 1191 case MAT_DO_NOT_USE_INODES: 1192 case MAT_IGNORE_ZERO_ENTRIES: 1193 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1194 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1195 break; 1196 case MAT_ROW_ORIENTED: 1197 a->roworiented = PETSC_TRUE; 1198 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1199 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1200 break; 1201 case MAT_ROWS_SORTED: 1202 case MAT_ROWS_UNSORTED: 1203 case MAT_YES_NEW_DIAGONALS: 1204 ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr); 1205 break; 1206 case MAT_COLUMN_ORIENTED: 1207 a->roworiented = PETSC_FALSE; 1208 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1209 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1210 break; 1211 case MAT_IGNORE_OFF_PROC_ENTRIES: 1212 a->donotstash = PETSC_TRUE; 1213 break; 1214 case MAT_NO_NEW_DIAGONALS: 1215 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1216 case MAT_SYMMETRIC: 1217 case MAT_STRUCTURALLY_SYMMETRIC: 1218 case MAT_HERMITIAN: 1219 case MAT_SYMMETRY_ETERNAL: 1220 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1221 break; 1222 case MAT_NOT_SYMMETRIC: 1223 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1224 case MAT_NOT_HERMITIAN: 1225 case MAT_NOT_SYMMETRY_ETERNAL: 1226 break; 1227 default: 1228 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1229 } 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatGetRow_MPIAIJ" 1235 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1236 { 1237 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1238 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1239 PetscErrorCode ierr; 1240 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1241 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1242 PetscInt *cmap,*idx_p; 1243 1244 PetscFunctionBegin; 1245 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1246 mat->getrowactive = PETSC_TRUE; 1247 1248 if (!mat->rowvalues && (idx || v)) { 1249 /* 1250 allocate enough space to hold information from the longest row. 1251 */ 1252 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1253 PetscInt max = 1,tmp; 1254 for (i=0; i<matin->m; i++) { 1255 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1256 if (max < tmp) { max = tmp; } 1257 } 1258 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1259 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1260 } 1261 1262 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1263 lrow = row - rstart; 1264 1265 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1266 if (!v) {pvA = 0; pvB = 0;} 1267 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1268 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1269 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1270 nztot = nzA + nzB; 1271 1272 cmap = mat->garray; 1273 if (v || idx) { 1274 if (nztot) { 1275 /* Sort by increasing column numbers, assuming A and B already sorted */ 1276 PetscInt imark = -1; 1277 if (v) { 1278 *v = v_p = mat->rowvalues; 1279 for (i=0; i<nzB; i++) { 1280 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1281 else break; 1282 } 1283 imark = i; 1284 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1285 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1286 } 1287 if (idx) { 1288 *idx = idx_p = mat->rowindices; 1289 if (imark > -1) { 1290 for (i=0; i<imark; i++) { 1291 idx_p[i] = cmap[cworkB[i]]; 1292 } 1293 } else { 1294 for (i=0; i<nzB; i++) { 1295 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1296 else break; 1297 } 1298 imark = i; 1299 } 1300 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1301 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1302 } 1303 } else { 1304 if (idx) *idx = 0; 1305 if (v) *v = 0; 1306 } 1307 } 1308 *nz = nztot; 1309 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1310 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1316 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1317 { 1318 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1319 1320 PetscFunctionBegin; 1321 if (!aij->getrowactive) { 1322 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1323 } 1324 aij->getrowactive = PETSC_FALSE; 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatNorm_MPIAIJ" 1330 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1331 { 1332 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1333 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1334 PetscErrorCode ierr; 1335 PetscInt i,j,cstart = aij->cstart; 1336 PetscReal sum = 0.0; 1337 PetscScalar *v; 1338 1339 PetscFunctionBegin; 1340 if (aij->size == 1) { 1341 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1342 } else { 1343 if (type == NORM_FROBENIUS) { 1344 v = amat->a; 1345 for (i=0; i<amat->nz; i++) { 1346 #if defined(PETSC_USE_COMPLEX) 1347 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1348 #else 1349 sum += (*v)*(*v); v++; 1350 #endif 1351 } 1352 v = bmat->a; 1353 for (i=0; i<bmat->nz; i++) { 1354 #if defined(PETSC_USE_COMPLEX) 1355 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1356 #else 1357 sum += (*v)*(*v); v++; 1358 #endif 1359 } 1360 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1361 *norm = sqrt(*norm); 1362 } else if (type == NORM_1) { /* max column norm */ 1363 PetscReal *tmp,*tmp2; 1364 PetscInt *jj,*garray = aij->garray; 1365 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1366 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1367 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1368 *norm = 0.0; 1369 v = amat->a; jj = amat->j; 1370 for (j=0; j<amat->nz; j++) { 1371 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1372 } 1373 v = bmat->a; jj = bmat->j; 1374 for (j=0; j<bmat->nz; j++) { 1375 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1376 } 1377 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1378 for (j=0; j<mat->N; j++) { 1379 if (tmp2[j] > *norm) *norm = tmp2[j]; 1380 } 1381 ierr = PetscFree(tmp);CHKERRQ(ierr); 1382 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1383 } else if (type == NORM_INFINITY) { /* max row norm */ 1384 PetscReal ntemp = 0.0; 1385 for (j=0; j<aij->A->m; j++) { 1386 v = amat->a + amat->i[j]; 1387 sum = 0.0; 1388 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1389 sum += PetscAbsScalar(*v); v++; 1390 } 1391 v = bmat->a + bmat->i[j]; 1392 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1393 sum += PetscAbsScalar(*v); v++; 1394 } 1395 if (sum > ntemp) ntemp = sum; 1396 } 1397 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1398 } else { 1399 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1400 } 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatTranspose_MPIAIJ" 1407 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1408 { 1409 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1410 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1411 PetscErrorCode ierr; 1412 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1413 Mat B; 1414 PetscScalar *array; 1415 1416 PetscFunctionBegin; 1417 if (!matout && M != N) { 1418 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1419 } 1420 1421 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1422 ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr); 1423 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1424 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1425 1426 /* copy over the A part */ 1427 Aloc = (Mat_SeqAIJ*)a->A->data; 1428 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1429 row = a->rstart; 1430 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1431 for (i=0; i<m; i++) { 1432 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1433 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1434 } 1435 aj = Aloc->j; 1436 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1437 1438 /* copy over the B part */ 1439 Aloc = (Mat_SeqAIJ*)a->B->data; 1440 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1441 row = a->rstart; 1442 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1443 ct = cols; 1444 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1445 for (i=0; i<m; i++) { 1446 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1447 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1448 } 1449 ierr = PetscFree(ct);CHKERRQ(ierr); 1450 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1451 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1452 if (matout) { 1453 *matout = B; 1454 } else { 1455 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1456 } 1457 PetscFunctionReturn(0); 1458 } 1459 1460 #undef __FUNCT__ 1461 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1462 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1463 { 1464 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1465 Mat a = aij->A,b = aij->B; 1466 PetscErrorCode ierr; 1467 PetscInt s1,s2,s3; 1468 1469 PetscFunctionBegin; 1470 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1471 if (rr) { 1472 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1473 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1474 /* Overlap communication with computation. */ 1475 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1476 } 1477 if (ll) { 1478 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1479 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1480 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1481 } 1482 /* scale the diagonal block */ 1483 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1484 1485 if (rr) { 1486 /* Do a scatter end and then right scale the off-diagonal block */ 1487 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1488 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1489 } 1490 1491 PetscFunctionReturn(0); 1492 } 1493 1494 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1497 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1498 { 1499 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1500 PetscErrorCode ierr; 1501 1502 PetscFunctionBegin; 1503 if (!a->rank) { 1504 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1505 } 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1511 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1512 { 1513 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1514 PetscErrorCode ierr; 1515 1516 PetscFunctionBegin; 1517 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1518 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1523 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1524 { 1525 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1526 PetscErrorCode ierr; 1527 1528 PetscFunctionBegin; 1529 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatEqual_MPIAIJ" 1535 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1536 { 1537 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1538 Mat a,b,c,d; 1539 PetscTruth flg; 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 a = matA->A; b = matA->B; 1544 c = matB->A; d = matB->B; 1545 1546 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1547 if (flg) { 1548 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1549 } 1550 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1551 PetscFunctionReturn(0); 1552 } 1553 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "MatCopy_MPIAIJ" 1556 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1557 { 1558 PetscErrorCode ierr; 1559 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1560 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1561 1562 PetscFunctionBegin; 1563 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1564 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1565 /* because of the column compression in the off-processor part of the matrix a->B, 1566 the number of columns in a->B and b->B may be different, hence we cannot call 1567 the MatCopy() directly on the two parts. If need be, we can provide a more 1568 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1569 then copying the submatrices */ 1570 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1571 } else { 1572 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1573 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1574 } 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1580 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1581 { 1582 PetscErrorCode ierr; 1583 1584 PetscFunctionBegin; 1585 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #include "petscblaslapack.h" 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatAXPY_MPIAIJ" 1592 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1593 { 1594 PetscErrorCode ierr; 1595 PetscInt i; 1596 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1597 PetscBLASInt bnz,one=1; 1598 Mat_SeqAIJ *x,*y; 1599 1600 PetscFunctionBegin; 1601 if (str == SAME_NONZERO_PATTERN) { 1602 PetscScalar alpha = a; 1603 x = (Mat_SeqAIJ *)xx->A->data; 1604 y = (Mat_SeqAIJ *)yy->A->data; 1605 bnz = (PetscBLASInt)x->nz; 1606 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1607 x = (Mat_SeqAIJ *)xx->B->data; 1608 y = (Mat_SeqAIJ *)yy->B->data; 1609 bnz = (PetscBLASInt)x->nz; 1610 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1611 } else if (str == SUBSET_NONZERO_PATTERN) { 1612 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1613 1614 x = (Mat_SeqAIJ *)xx->B->data; 1615 y = (Mat_SeqAIJ *)yy->B->data; 1616 if (y->xtoy && y->XtoY != xx->B) { 1617 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1618 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1619 } 1620 if (!y->xtoy) { /* get xtoy */ 1621 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1622 y->XtoY = xx->B; 1623 } 1624 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1625 } else { 1626 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1627 } 1628 PetscFunctionReturn(0); 1629 } 1630 1631 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1632 1633 #undef __FUNCT__ 1634 #define __FUNCT__ "MatConjugate_MPIAIJ" 1635 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1636 { 1637 #if defined(PETSC_USE_COMPLEX) 1638 PetscErrorCode ierr; 1639 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1640 1641 PetscFunctionBegin; 1642 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1643 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1644 #else 1645 PetscFunctionBegin; 1646 #endif 1647 PetscFunctionReturn(0); 1648 } 1649 1650 /* -------------------------------------------------------------------*/ 1651 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1652 MatGetRow_MPIAIJ, 1653 MatRestoreRow_MPIAIJ, 1654 MatMult_MPIAIJ, 1655 /* 4*/ MatMultAdd_MPIAIJ, 1656 MatMultTranspose_MPIAIJ, 1657 MatMultTransposeAdd_MPIAIJ, 1658 0, 1659 0, 1660 0, 1661 /*10*/ 0, 1662 0, 1663 0, 1664 MatRelax_MPIAIJ, 1665 MatTranspose_MPIAIJ, 1666 /*15*/ MatGetInfo_MPIAIJ, 1667 MatEqual_MPIAIJ, 1668 MatGetDiagonal_MPIAIJ, 1669 MatDiagonalScale_MPIAIJ, 1670 MatNorm_MPIAIJ, 1671 /*20*/ MatAssemblyBegin_MPIAIJ, 1672 MatAssemblyEnd_MPIAIJ, 1673 0, 1674 MatSetOption_MPIAIJ, 1675 MatZeroEntries_MPIAIJ, 1676 /*25*/ MatZeroRows_MPIAIJ, 1677 0, 1678 0, 1679 0, 1680 0, 1681 /*30*/ MatSetUpPreallocation_MPIAIJ, 1682 0, 1683 0, 1684 0, 1685 0, 1686 /*35*/ MatDuplicate_MPIAIJ, 1687 0, 1688 0, 1689 0, 1690 0, 1691 /*40*/ MatAXPY_MPIAIJ, 1692 MatGetSubMatrices_MPIAIJ, 1693 MatIncreaseOverlap_MPIAIJ, 1694 MatGetValues_MPIAIJ, 1695 MatCopy_MPIAIJ, 1696 /*45*/ MatPrintHelp_MPIAIJ, 1697 MatScale_MPIAIJ, 1698 0, 1699 0, 1700 0, 1701 /*50*/ MatSetBlockSize_MPIAIJ, 1702 0, 1703 0, 1704 0, 1705 0, 1706 /*55*/ MatFDColoringCreate_MPIAIJ, 1707 0, 1708 MatSetUnfactored_MPIAIJ, 1709 MatPermute_MPIAIJ, 1710 0, 1711 /*60*/ MatGetSubMatrix_MPIAIJ, 1712 MatDestroy_MPIAIJ, 1713 MatView_MPIAIJ, 1714 MatGetPetscMaps_Petsc, 1715 0, 1716 /*65*/ 0, 1717 0, 1718 0, 1719 0, 1720 0, 1721 /*70*/ 0, 1722 0, 1723 MatSetColoring_MPIAIJ, 1724 #if defined(PETSC_HAVE_ADIC) 1725 MatSetValuesAdic_MPIAIJ, 1726 #else 1727 0, 1728 #endif 1729 MatSetValuesAdifor_MPIAIJ, 1730 /*75*/ 0, 1731 0, 1732 0, 1733 0, 1734 0, 1735 /*80*/ 0, 1736 0, 1737 0, 1738 0, 1739 /*84*/ MatLoad_MPIAIJ, 1740 0, 1741 0, 1742 0, 1743 0, 1744 0, 1745 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1746 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1747 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1748 MatPtAP_Basic, 1749 MatPtAPSymbolic_MPIAIJ, 1750 /*95*/ MatPtAPNumeric_MPIAIJ, 1751 0, 1752 0, 1753 0, 1754 0, 1755 /*100*/0, 1756 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1757 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1758 MatConjugate_MPIAIJ 1759 }; 1760 1761 /* ----------------------------------------------------------------------------------------*/ 1762 1763 EXTERN_C_BEGIN 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1766 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1767 { 1768 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1769 PetscErrorCode ierr; 1770 1771 PetscFunctionBegin; 1772 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1773 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1774 PetscFunctionReturn(0); 1775 } 1776 EXTERN_C_END 1777 1778 EXTERN_C_BEGIN 1779 #undef __FUNCT__ 1780 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1781 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1782 { 1783 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1784 PetscErrorCode ierr; 1785 1786 PetscFunctionBegin; 1787 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1788 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1789 PetscFunctionReturn(0); 1790 } 1791 EXTERN_C_END 1792 1793 #include "petscpc.h" 1794 EXTERN_C_BEGIN 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1797 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1798 { 1799 Mat_MPIAIJ *b; 1800 PetscErrorCode ierr; 1801 PetscInt i; 1802 1803 PetscFunctionBegin; 1804 B->preallocated = PETSC_TRUE; 1805 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1806 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1807 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1808 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1809 if (d_nnz) { 1810 for (i=0; i<B->m; i++) { 1811 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1812 } 1813 } 1814 if (o_nnz) { 1815 for (i=0; i<B->m; i++) { 1816 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1817 } 1818 } 1819 b = (Mat_MPIAIJ*)B->data; 1820 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1821 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1822 1823 PetscFunctionReturn(0); 1824 } 1825 EXTERN_C_END 1826 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1829 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1830 { 1831 Mat mat; 1832 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1833 PetscErrorCode ierr; 1834 1835 PetscFunctionBegin; 1836 *newmat = 0; 1837 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1838 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1839 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1840 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1841 a = (Mat_MPIAIJ*)mat->data; 1842 1843 mat->factor = matin->factor; 1844 mat->bs = matin->bs; 1845 mat->assembled = PETSC_TRUE; 1846 mat->insertmode = NOT_SET_VALUES; 1847 mat->preallocated = PETSC_TRUE; 1848 1849 a->rstart = oldmat->rstart; 1850 a->rend = oldmat->rend; 1851 a->cstart = oldmat->cstart; 1852 a->cend = oldmat->cend; 1853 a->size = oldmat->size; 1854 a->rank = oldmat->rank; 1855 a->donotstash = oldmat->donotstash; 1856 a->roworiented = oldmat->roworiented; 1857 a->rowindices = 0; 1858 a->rowvalues = 0; 1859 a->getrowactive = PETSC_FALSE; 1860 1861 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1862 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1863 if (oldmat->colmap) { 1864 #if defined (PETSC_USE_CTABLE) 1865 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1866 #else 1867 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1868 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1869 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1870 #endif 1871 } else a->colmap = 0; 1872 if (oldmat->garray) { 1873 PetscInt len; 1874 len = oldmat->B->n; 1875 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1876 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1877 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1878 } else a->garray = 0; 1879 1880 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1881 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1882 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1883 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1884 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1885 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1886 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1887 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1888 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1889 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1890 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1891 *newmat = mat; 1892 PetscFunctionReturn(0); 1893 } 1894 1895 #include "petscsys.h" 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatLoad_MPIAIJ" 1899 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1900 { 1901 Mat A; 1902 PetscScalar *vals,*svals; 1903 MPI_Comm comm = ((PetscObject)viewer)->comm; 1904 MPI_Status status; 1905 PetscErrorCode ierr; 1906 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1907 PetscInt i,nz,j,rstart,rend,mmax; 1908 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1909 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1910 PetscInt cend,cstart,n,*rowners; 1911 int fd; 1912 1913 PetscFunctionBegin; 1914 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1915 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1916 if (!rank) { 1917 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1918 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1919 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1920 } 1921 1922 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1923 M = header[1]; N = header[2]; 1924 /* determine ownership of all rows */ 1925 m = M/size + ((M % size) > rank); 1926 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1927 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1928 1929 /* First process needs enough room for process with most rows */ 1930 if (!rank) { 1931 mmax = rowners[1]; 1932 for (i=2; i<size; i++) { 1933 mmax = PetscMax(mmax,rowners[i]); 1934 } 1935 } else mmax = m; 1936 1937 rowners[0] = 0; 1938 for (i=2; i<=size; i++) { 1939 mmax = PetscMax(mmax,rowners[i]); 1940 rowners[i] += rowners[i-1]; 1941 } 1942 rstart = rowners[rank]; 1943 rend = rowners[rank+1]; 1944 1945 /* distribute row lengths to all processors */ 1946 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 1947 if (!rank) { 1948 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1949 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1950 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1951 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1952 for (j=0; j<m; j++) { 1953 procsnz[0] += ourlens[j]; 1954 } 1955 for (i=1; i<size; i++) { 1956 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1957 /* calculate the number of nonzeros on each processor */ 1958 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1959 procsnz[i] += rowlengths[j]; 1960 } 1961 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1962 } 1963 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1964 } else { 1965 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1966 } 1967 1968 if (!rank) { 1969 /* determine max buffer needed and allocate it */ 1970 maxnz = 0; 1971 for (i=0; i<size; i++) { 1972 maxnz = PetscMax(maxnz,procsnz[i]); 1973 } 1974 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1975 1976 /* read in my part of the matrix column indices */ 1977 nz = procsnz[0]; 1978 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1979 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1980 1981 /* read in every one elses and ship off */ 1982 for (i=1; i<size; i++) { 1983 nz = procsnz[i]; 1984 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1985 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1986 } 1987 ierr = PetscFree(cols);CHKERRQ(ierr); 1988 } else { 1989 /* determine buffer space needed for message */ 1990 nz = 0; 1991 for (i=0; i<m; i++) { 1992 nz += ourlens[i]; 1993 } 1994 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1995 1996 /* receive message of column indices*/ 1997 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1998 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1999 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2000 } 2001 2002 /* determine column ownership if matrix is not square */ 2003 if (N != M) { 2004 n = N/size + ((N % size) > rank); 2005 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2006 cstart = cend - n; 2007 } else { 2008 cstart = rstart; 2009 cend = rend; 2010 n = cend - cstart; 2011 } 2012 2013 /* loop over local rows, determining number of off diagonal entries */ 2014 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2015 jj = 0; 2016 for (i=0; i<m; i++) { 2017 for (j=0; j<ourlens[i]; j++) { 2018 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2019 jj++; 2020 } 2021 } 2022 2023 /* create our matrix */ 2024 for (i=0; i<m; i++) { 2025 ourlens[i] -= offlens[i]; 2026 } 2027 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2028 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2029 ierr = MatSetType(A,type);CHKERRQ(ierr); 2030 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2031 2032 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2033 for (i=0; i<m; i++) { 2034 ourlens[i] += offlens[i]; 2035 } 2036 2037 if (!rank) { 2038 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2039 2040 /* read in my part of the matrix numerical values */ 2041 nz = procsnz[0]; 2042 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2043 2044 /* insert into matrix */ 2045 jj = rstart; 2046 smycols = mycols; 2047 svals = vals; 2048 for (i=0; i<m; i++) { 2049 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2050 smycols += ourlens[i]; 2051 svals += ourlens[i]; 2052 jj++; 2053 } 2054 2055 /* read in other processors and ship out */ 2056 for (i=1; i<size; i++) { 2057 nz = procsnz[i]; 2058 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2059 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2060 } 2061 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2062 } else { 2063 /* receive numeric values */ 2064 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2065 2066 /* receive message of values*/ 2067 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2068 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2069 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2070 2071 /* insert into matrix */ 2072 jj = rstart; 2073 smycols = mycols; 2074 svals = vals; 2075 for (i=0; i<m; i++) { 2076 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2077 smycols += ourlens[i]; 2078 svals += ourlens[i]; 2079 jj++; 2080 } 2081 } 2082 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2083 ierr = PetscFree(vals);CHKERRQ(ierr); 2084 ierr = PetscFree(mycols);CHKERRQ(ierr); 2085 ierr = PetscFree(rowners);CHKERRQ(ierr); 2086 2087 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2088 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2089 *newmat = A; 2090 PetscFunctionReturn(0); 2091 } 2092 2093 #undef __FUNCT__ 2094 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2095 /* 2096 Not great since it makes two copies of the submatrix, first an SeqAIJ 2097 in local and then by concatenating the local matrices the end result. 2098 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2099 */ 2100 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2101 { 2102 PetscErrorCode ierr; 2103 PetscMPIInt rank,size; 2104 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2105 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2106 Mat *local,M,Mreuse; 2107 PetscScalar *vwork,*aa; 2108 MPI_Comm comm = mat->comm; 2109 Mat_SeqAIJ *aij; 2110 2111 2112 PetscFunctionBegin; 2113 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2114 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2115 2116 if (call == MAT_REUSE_MATRIX) { 2117 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2118 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2119 local = &Mreuse; 2120 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2121 } else { 2122 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2123 Mreuse = *local; 2124 ierr = PetscFree(local);CHKERRQ(ierr); 2125 } 2126 2127 /* 2128 m - number of local rows 2129 n - number of columns (same on all processors) 2130 rstart - first row in new global matrix generated 2131 */ 2132 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2133 if (call == MAT_INITIAL_MATRIX) { 2134 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2135 ii = aij->i; 2136 jj = aij->j; 2137 2138 /* 2139 Determine the number of non-zeros in the diagonal and off-diagonal 2140 portions of the matrix in order to do correct preallocation 2141 */ 2142 2143 /* first get start and end of "diagonal" columns */ 2144 if (csize == PETSC_DECIDE) { 2145 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2146 if (mglobal == n) { /* square matrix */ 2147 nlocal = m; 2148 } else { 2149 nlocal = n/size + ((n % size) > rank); 2150 } 2151 } else { 2152 nlocal = csize; 2153 } 2154 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2155 rstart = rend - nlocal; 2156 if (rank == size - 1 && rend != n) { 2157 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2158 } 2159 2160 /* next, compute all the lengths */ 2161 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2162 olens = dlens + m; 2163 for (i=0; i<m; i++) { 2164 jend = ii[i+1] - ii[i]; 2165 olen = 0; 2166 dlen = 0; 2167 for (j=0; j<jend; j++) { 2168 if (*jj < rstart || *jj >= rend) olen++; 2169 else dlen++; 2170 jj++; 2171 } 2172 olens[i] = olen; 2173 dlens[i] = dlen; 2174 } 2175 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2176 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2177 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2178 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2179 ierr = PetscFree(dlens);CHKERRQ(ierr); 2180 } else { 2181 PetscInt ml,nl; 2182 2183 M = *newmat; 2184 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2185 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2186 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2187 /* 2188 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2189 rather than the slower MatSetValues(). 2190 */ 2191 M->was_assembled = PETSC_TRUE; 2192 M->assembled = PETSC_FALSE; 2193 } 2194 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2195 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2196 ii = aij->i; 2197 jj = aij->j; 2198 aa = aij->a; 2199 for (i=0; i<m; i++) { 2200 row = rstart + i; 2201 nz = ii[i+1] - ii[i]; 2202 cwork = jj; jj += nz; 2203 vwork = aa; aa += nz; 2204 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2205 } 2206 2207 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2208 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2209 *newmat = M; 2210 2211 /* save submatrix used in processor for next request */ 2212 if (call == MAT_INITIAL_MATRIX) { 2213 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2214 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2215 } 2216 2217 PetscFunctionReturn(0); 2218 } 2219 2220 EXTERN_C_BEGIN 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2223 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2224 { 2225 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2226 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2227 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2228 const PetscInt *JJ; 2229 PetscScalar *values; 2230 PetscErrorCode ierr; 2231 2232 PetscFunctionBegin; 2233 if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]); 2234 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2235 o_nnz = d_nnz + m; 2236 2237 for (i=0; i<m; i++) { 2238 nnz = I[i+1]- I[i]; 2239 JJ = J + I[i]; 2240 nnz_max = PetscMax(nnz_max,nnz); 2241 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2242 for (j=0; j<nnz; j++) { 2243 if (*JJ >= cstart) break; 2244 JJ++; 2245 } 2246 d = 0; 2247 for (; j<nnz; j++) { 2248 if (*JJ++ >= cend) break; 2249 d++; 2250 } 2251 d_nnz[i] = d; 2252 o_nnz[i] = nnz - d; 2253 } 2254 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2255 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2256 2257 if (v) values = (PetscScalar*)v; 2258 else { 2259 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2260 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2261 } 2262 2263 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2264 for (i=0; i<m; i++) { 2265 ii = i + rstart; 2266 nnz = I[i+1]- I[i]; 2267 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2268 } 2269 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2270 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2271 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2272 2273 if (!v) { 2274 ierr = PetscFree(values);CHKERRQ(ierr); 2275 } 2276 PetscFunctionReturn(0); 2277 } 2278 EXTERN_C_END 2279 2280 #undef __FUNCT__ 2281 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2282 /*@C 2283 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2284 (the default parallel PETSc format). 2285 2286 Collective on MPI_Comm 2287 2288 Input Parameters: 2289 + B - the matrix 2290 . i - the indices into j for the start of each local row (starts with zero) 2291 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2292 - v - optional values in the matrix 2293 2294 Level: developer 2295 2296 .keywords: matrix, aij, compressed row, sparse, parallel 2297 2298 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2299 @*/ 2300 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2301 { 2302 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2303 2304 PetscFunctionBegin; 2305 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2306 if (f) { 2307 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2308 } 2309 PetscFunctionReturn(0); 2310 } 2311 2312 #undef __FUNCT__ 2313 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2314 /*@C 2315 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2316 (the default parallel PETSc format). For good matrix assembly performance 2317 the user should preallocate the matrix storage by setting the parameters 2318 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2319 performance can be increased by more than a factor of 50. 2320 2321 Collective on MPI_Comm 2322 2323 Input Parameters: 2324 + A - the matrix 2325 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2326 (same value is used for all local rows) 2327 . d_nnz - array containing the number of nonzeros in the various rows of the 2328 DIAGONAL portion of the local submatrix (possibly different for each row) 2329 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2330 The size of this array is equal to the number of local rows, i.e 'm'. 2331 You must leave room for the diagonal entry even if it is zero. 2332 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2333 submatrix (same value is used for all local rows). 2334 - o_nnz - array containing the number of nonzeros in the various rows of the 2335 OFF-DIAGONAL portion of the local submatrix (possibly different for 2336 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2337 structure. The size of this array is equal to the number 2338 of local rows, i.e 'm'. 2339 2340 If the *_nnz parameter is given then the *_nz parameter is ignored 2341 2342 The AIJ format (also called the Yale sparse matrix format or 2343 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2344 storage. The stored row and column indices begin with zero. See the users manual for details. 2345 2346 The parallel matrix is partitioned such that the first m0 rows belong to 2347 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2348 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2349 2350 The DIAGONAL portion of the local submatrix of a processor can be defined 2351 as the submatrix which is obtained by extraction the part corresponding 2352 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2353 first row that belongs to the processor, and r2 is the last row belonging 2354 to the this processor. This is a square mxm matrix. The remaining portion 2355 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2356 2357 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2358 2359 Example usage: 2360 2361 Consider the following 8x8 matrix with 34 non-zero values, that is 2362 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2363 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2364 as follows: 2365 2366 .vb 2367 1 2 0 | 0 3 0 | 0 4 2368 Proc0 0 5 6 | 7 0 0 | 8 0 2369 9 0 10 | 11 0 0 | 12 0 2370 ------------------------------------- 2371 13 0 14 | 15 16 17 | 0 0 2372 Proc1 0 18 0 | 19 20 21 | 0 0 2373 0 0 0 | 22 23 0 | 24 0 2374 ------------------------------------- 2375 Proc2 25 26 27 | 0 0 28 | 29 0 2376 30 0 0 | 31 32 33 | 0 34 2377 .ve 2378 2379 This can be represented as a collection of submatrices as: 2380 2381 .vb 2382 A B C 2383 D E F 2384 G H I 2385 .ve 2386 2387 Where the submatrices A,B,C are owned by proc0, D,E,F are 2388 owned by proc1, G,H,I are owned by proc2. 2389 2390 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2391 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2392 The 'M','N' parameters are 8,8, and have the same values on all procs. 2393 2394 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2395 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2396 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2397 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2398 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2399 matrix, ans [DF] as another SeqAIJ matrix. 2400 2401 When d_nz, o_nz parameters are specified, d_nz storage elements are 2402 allocated for every row of the local diagonal submatrix, and o_nz 2403 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2404 One way to choose d_nz and o_nz is to use the max nonzerors per local 2405 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2406 In this case, the values of d_nz,o_nz are: 2407 .vb 2408 proc0 : dnz = 2, o_nz = 2 2409 proc1 : dnz = 3, o_nz = 2 2410 proc2 : dnz = 1, o_nz = 4 2411 .ve 2412 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2413 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2414 for proc3. i.e we are using 12+15+10=37 storage locations to store 2415 34 values. 2416 2417 When d_nnz, o_nnz parameters are specified, the storage is specified 2418 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2419 In the above case the values for d_nnz,o_nnz are: 2420 .vb 2421 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2422 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2423 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2424 .ve 2425 Here the space allocated is sum of all the above values i.e 34, and 2426 hence pre-allocation is perfect. 2427 2428 Level: intermediate 2429 2430 .keywords: matrix, aij, compressed row, sparse, parallel 2431 2432 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2433 MPIAIJ 2434 @*/ 2435 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2436 { 2437 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2438 2439 PetscFunctionBegin; 2440 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2441 if (f) { 2442 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2443 } 2444 PetscFunctionReturn(0); 2445 } 2446 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "MatCreateMPIAIJ" 2449 /*@C 2450 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2451 (the default parallel PETSc format). For good matrix assembly performance 2452 the user should preallocate the matrix storage by setting the parameters 2453 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2454 performance can be increased by more than a factor of 50. 2455 2456 Collective on MPI_Comm 2457 2458 Input Parameters: 2459 + comm - MPI communicator 2460 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2461 This value should be the same as the local size used in creating the 2462 y vector for the matrix-vector product y = Ax. 2463 . n - This value should be the same as the local size used in creating the 2464 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2465 calculated if N is given) For square matrices n is almost always m. 2466 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2467 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2468 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2469 (same value is used for all local rows) 2470 . d_nnz - array containing the number of nonzeros in the various rows of the 2471 DIAGONAL portion of the local submatrix (possibly different for each row) 2472 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2473 The size of this array is equal to the number of local rows, i.e 'm'. 2474 You must leave room for the diagonal entry even if it is zero. 2475 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2476 submatrix (same value is used for all local rows). 2477 - o_nnz - array containing the number of nonzeros in the various rows of the 2478 OFF-DIAGONAL portion of the local submatrix (possibly different for 2479 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2480 structure. The size of this array is equal to the number 2481 of local rows, i.e 'm'. 2482 2483 Output Parameter: 2484 . A - the matrix 2485 2486 Notes: 2487 If the *_nnz parameter is given then the *_nz parameter is ignored 2488 2489 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2490 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2491 storage requirements for this matrix. 2492 2493 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2494 processor than it must be used on all processors that share the object for 2495 that argument. 2496 2497 The user MUST specify either the local or global matrix dimensions 2498 (possibly both). 2499 2500 The parallel matrix is partitioned such that the first m0 rows belong to 2501 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2502 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2503 2504 The DIAGONAL portion of the local submatrix of a processor can be defined 2505 as the submatrix which is obtained by extraction the part corresponding 2506 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2507 first row that belongs to the processor, and r2 is the last row belonging 2508 to the this processor. This is a square mxm matrix. The remaining portion 2509 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2510 2511 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2512 2513 When calling this routine with a single process communicator, a matrix of 2514 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2515 type of communicator, use the construction mechanism: 2516 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2517 2518 By default, this format uses inodes (identical nodes) when possible. 2519 We search for consecutive rows with the same nonzero structure, thereby 2520 reusing matrix information to achieve increased efficiency. 2521 2522 Options Database Keys: 2523 + -mat_no_inode - Do not use inodes 2524 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2525 - -mat_aij_oneindex - Internally use indexing starting at 1 2526 rather than 0. Note that when calling MatSetValues(), 2527 the user still MUST index entries starting at 0! 2528 2529 2530 Example usage: 2531 2532 Consider the following 8x8 matrix with 34 non-zero values, that is 2533 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2534 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2535 as follows: 2536 2537 .vb 2538 1 2 0 | 0 3 0 | 0 4 2539 Proc0 0 5 6 | 7 0 0 | 8 0 2540 9 0 10 | 11 0 0 | 12 0 2541 ------------------------------------- 2542 13 0 14 | 15 16 17 | 0 0 2543 Proc1 0 18 0 | 19 20 21 | 0 0 2544 0 0 0 | 22 23 0 | 24 0 2545 ------------------------------------- 2546 Proc2 25 26 27 | 0 0 28 | 29 0 2547 30 0 0 | 31 32 33 | 0 34 2548 .ve 2549 2550 This can be represented as a collection of submatrices as: 2551 2552 .vb 2553 A B C 2554 D E F 2555 G H I 2556 .ve 2557 2558 Where the submatrices A,B,C are owned by proc0, D,E,F are 2559 owned by proc1, G,H,I are owned by proc2. 2560 2561 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2562 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2563 The 'M','N' parameters are 8,8, and have the same values on all procs. 2564 2565 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2566 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2567 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2568 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2569 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2570 matrix, ans [DF] as another SeqAIJ matrix. 2571 2572 When d_nz, o_nz parameters are specified, d_nz storage elements are 2573 allocated for every row of the local diagonal submatrix, and o_nz 2574 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2575 One way to choose d_nz and o_nz is to use the max nonzerors per local 2576 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2577 In this case, the values of d_nz,o_nz are: 2578 .vb 2579 proc0 : dnz = 2, o_nz = 2 2580 proc1 : dnz = 3, o_nz = 2 2581 proc2 : dnz = 1, o_nz = 4 2582 .ve 2583 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2584 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2585 for proc3. i.e we are using 12+15+10=37 storage locations to store 2586 34 values. 2587 2588 When d_nnz, o_nnz parameters are specified, the storage is specified 2589 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2590 In the above case the values for d_nnz,o_nnz are: 2591 .vb 2592 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2593 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2594 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2595 .ve 2596 Here the space allocated is sum of all the above values i.e 34, and 2597 hence pre-allocation is perfect. 2598 2599 Level: intermediate 2600 2601 .keywords: matrix, aij, compressed row, sparse, parallel 2602 2603 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2604 MPIAIJ 2605 @*/ 2606 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2607 { 2608 PetscErrorCode ierr; 2609 PetscMPIInt size; 2610 2611 PetscFunctionBegin; 2612 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2613 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2614 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2615 if (size > 1) { 2616 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2617 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2618 } else { 2619 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2620 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2621 } 2622 PetscFunctionReturn(0); 2623 } 2624 2625 #undef __FUNCT__ 2626 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2627 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2628 { 2629 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2630 2631 PetscFunctionBegin; 2632 *Ad = a->A; 2633 *Ao = a->B; 2634 *colmap = a->garray; 2635 PetscFunctionReturn(0); 2636 } 2637 2638 #undef __FUNCT__ 2639 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2640 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2641 { 2642 PetscErrorCode ierr; 2643 PetscInt i; 2644 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2645 2646 PetscFunctionBegin; 2647 if (coloring->ctype == IS_COLORING_LOCAL) { 2648 ISColoringValue *allcolors,*colors; 2649 ISColoring ocoloring; 2650 2651 /* set coloring for diagonal portion */ 2652 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2653 2654 /* set coloring for off-diagonal portion */ 2655 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2656 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2657 for (i=0; i<a->B->n; i++) { 2658 colors[i] = allcolors[a->garray[i]]; 2659 } 2660 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2661 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2662 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2663 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2664 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2665 ISColoringValue *colors; 2666 PetscInt *larray; 2667 ISColoring ocoloring; 2668 2669 /* set coloring for diagonal portion */ 2670 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2671 for (i=0; i<a->A->n; i++) { 2672 larray[i] = i + a->cstart; 2673 } 2674 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2675 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2676 for (i=0; i<a->A->n; i++) { 2677 colors[i] = coloring->colors[larray[i]]; 2678 } 2679 ierr = PetscFree(larray);CHKERRQ(ierr); 2680 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2681 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2682 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2683 2684 /* set coloring for off-diagonal portion */ 2685 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2686 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2687 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2688 for (i=0; i<a->B->n; i++) { 2689 colors[i] = coloring->colors[larray[i]]; 2690 } 2691 ierr = PetscFree(larray);CHKERRQ(ierr); 2692 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2693 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2694 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2695 } else { 2696 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2697 } 2698 2699 PetscFunctionReturn(0); 2700 } 2701 2702 #if defined(PETSC_HAVE_ADIC) 2703 #undef __FUNCT__ 2704 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2705 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2708 PetscErrorCode ierr; 2709 2710 PetscFunctionBegin; 2711 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2712 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2713 PetscFunctionReturn(0); 2714 } 2715 #endif 2716 2717 #undef __FUNCT__ 2718 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2719 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2720 { 2721 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2722 PetscErrorCode ierr; 2723 2724 PetscFunctionBegin; 2725 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2726 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2727 PetscFunctionReturn(0); 2728 } 2729 2730 #undef __FUNCT__ 2731 #define __FUNCT__ "MatMerge" 2732 /*@C 2733 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2734 matrices from each processor 2735 2736 Collective on MPI_Comm 2737 2738 Input Parameters: 2739 + comm - the communicators the parallel matrix will live on 2740 . inmat - the input sequential matrices 2741 . n - number of local columns (or PETSC_DECIDE) 2742 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2743 2744 Output Parameter: 2745 . outmat - the parallel matrix generated 2746 2747 Level: advanced 2748 2749 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2750 2751 @*/ 2752 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2753 { 2754 PetscErrorCode ierr; 2755 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2756 PetscInt *indx; 2757 PetscScalar *values; 2758 PetscMap columnmap,rowmap; 2759 2760 PetscFunctionBegin; 2761 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2762 /* 2763 PetscMPIInt rank; 2764 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2765 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2766 */ 2767 if (scall == MAT_INITIAL_MATRIX){ 2768 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2769 if (n == PETSC_DECIDE){ 2770 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2771 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2772 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2773 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2774 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2775 } 2776 2777 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2778 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2779 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2780 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2781 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2782 2783 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2784 for (i=0;i<m;i++) { 2785 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2786 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2787 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2788 } 2789 /* This routine will ONLY return MPIAIJ type matrix */ 2790 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2791 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2792 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2793 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2794 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2795 2796 } else if (scall == MAT_REUSE_MATRIX){ 2797 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2798 } else { 2799 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2800 } 2801 2802 for (i=0;i<m;i++) { 2803 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2804 I = i + rstart; 2805 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2806 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2807 } 2808 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2809 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2810 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2811 2812 PetscFunctionReturn(0); 2813 } 2814 2815 #undef __FUNCT__ 2816 #define __FUNCT__ "MatFileSplit" 2817 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2818 { 2819 PetscErrorCode ierr; 2820 PetscMPIInt rank; 2821 PetscInt m,N,i,rstart,nnz; 2822 size_t len; 2823 const PetscInt *indx; 2824 PetscViewer out; 2825 char *name; 2826 Mat B; 2827 const PetscScalar *values; 2828 2829 PetscFunctionBegin; 2830 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2831 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2832 /* Should this be the type of the diagonal block of A? */ 2833 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2834 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2835 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2836 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2837 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2838 for (i=0;i<m;i++) { 2839 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2840 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2841 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2842 } 2843 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2844 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2845 2846 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2847 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2848 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2849 sprintf(name,"%s.%d",outfile,rank); 2850 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2851 ierr = PetscFree(name); 2852 ierr = MatView(B,out);CHKERRQ(ierr); 2853 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2854 ierr = MatDestroy(B);CHKERRQ(ierr); 2855 PetscFunctionReturn(0); 2856 } 2857 2858 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2859 #undef __FUNCT__ 2860 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2861 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2862 { 2863 PetscErrorCode ierr; 2864 Mat_Merge_SeqsToMPI *merge; 2865 PetscObjectContainer container; 2866 2867 PetscFunctionBegin; 2868 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2869 if (container) { 2870 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2871 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2872 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2873 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2874 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2875 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2876 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2877 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2878 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2879 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2880 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2881 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2882 2883 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2884 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2885 } 2886 ierr = PetscFree(merge);CHKERRQ(ierr); 2887 2888 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2889 PetscFunctionReturn(0); 2890 } 2891 2892 #include "src/mat/utils/freespace.h" 2893 #include "petscbt.h" 2894 static PetscEvent logkey_seqstompinum = 0; 2895 #undef __FUNCT__ 2896 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2897 /*@C 2898 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2899 matrices from each processor 2900 2901 Collective on MPI_Comm 2902 2903 Input Parameters: 2904 + comm - the communicators the parallel matrix will live on 2905 . seqmat - the input sequential matrices 2906 . m - number of local rows (or PETSC_DECIDE) 2907 . n - number of local columns (or PETSC_DECIDE) 2908 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2909 2910 Output Parameter: 2911 . mpimat - the parallel matrix generated 2912 2913 Level: advanced 2914 2915 Notes: 2916 The dimensions of the sequential matrix in each processor MUST be the same. 2917 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2918 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2919 @*/ 2920 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2921 { 2922 PetscErrorCode ierr; 2923 MPI_Comm comm=mpimat->comm; 2924 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2925 PetscMPIInt size,rank,taga,*len_s; 2926 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2927 PetscInt proc,m; 2928 PetscInt **buf_ri,**buf_rj; 2929 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2930 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2931 MPI_Request *s_waits,*r_waits; 2932 MPI_Status *status; 2933 MatScalar *aa=a->a,**abuf_r,*ba_i; 2934 Mat_Merge_SeqsToMPI *merge; 2935 PetscObjectContainer container; 2936 2937 PetscFunctionBegin; 2938 if (!logkey_seqstompinum) { 2939 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2940 } 2941 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2942 2943 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2944 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2945 2946 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2947 if (container) { 2948 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2949 } 2950 bi = merge->bi; 2951 bj = merge->bj; 2952 buf_ri = merge->buf_ri; 2953 buf_rj = merge->buf_rj; 2954 2955 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2956 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2957 len_s = merge->len_s; 2958 2959 /* send and recv matrix values */ 2960 /*-----------------------------*/ 2961 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2962 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2963 2964 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2965 for (proc=0,k=0; proc<size; proc++){ 2966 if (!len_s[proc]) continue; 2967 i = owners[proc]; 2968 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2969 k++; 2970 } 2971 2972 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 2973 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 2974 ierr = PetscFree(status);CHKERRQ(ierr); 2975 2976 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2977 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2978 2979 /* insert mat values of mpimat */ 2980 /*----------------------------*/ 2981 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2982 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2983 nextrow = buf_ri_k + merge->nrecv; 2984 nextai = nextrow + merge->nrecv; 2985 2986 for (k=0; k<merge->nrecv; k++){ 2987 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2988 nrows = *(buf_ri_k[k]); 2989 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2990 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2991 } 2992 2993 /* set values of ba */ 2994 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2995 for (i=0; i<m; i++) { 2996 arow = owners[rank] + i; 2997 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2998 bnzi = bi[i+1] - bi[i]; 2999 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3000 3001 /* add local non-zero vals of this proc's seqmat into ba */ 3002 anzi = ai[arow+1] - ai[arow]; 3003 aj = a->j + ai[arow]; 3004 aa = a->a + ai[arow]; 3005 nextaj = 0; 3006 for (j=0; nextaj<anzi; j++){ 3007 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3008 ba_i[j] += aa[nextaj++]; 3009 } 3010 } 3011 3012 /* add received vals into ba */ 3013 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3014 /* i-th row */ 3015 if (i == *nextrow[k]) { 3016 anzi = *(nextai[k]+1) - *nextai[k]; 3017 aj = buf_rj[k] + *(nextai[k]); 3018 aa = abuf_r[k] + *(nextai[k]); 3019 nextaj = 0; 3020 for (j=0; nextaj<anzi; j++){ 3021 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3022 ba_i[j] += aa[nextaj++]; 3023 } 3024 } 3025 nextrow[k]++; nextai[k]++; 3026 } 3027 } 3028 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3029 } 3030 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3031 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3032 3033 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3034 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3035 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3036 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3037 PetscFunctionReturn(0); 3038 } 3039 3040 static PetscEvent logkey_seqstompisym = 0; 3041 #undef __FUNCT__ 3042 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3043 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3044 { 3045 PetscErrorCode ierr; 3046 Mat B_mpi; 3047 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3048 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3049 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3050 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3051 PetscInt len,proc,*dnz,*onz; 3052 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3053 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3054 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3055 MPI_Status *status; 3056 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3057 PetscBT lnkbt; 3058 Mat_Merge_SeqsToMPI *merge; 3059 PetscObjectContainer container; 3060 3061 PetscFunctionBegin; 3062 if (!logkey_seqstompisym) { 3063 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3064 } 3065 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3066 3067 /* make sure it is a PETSc comm */ 3068 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3069 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3070 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3071 3072 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3073 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3074 3075 /* determine row ownership */ 3076 /*---------------------------------------------------------*/ 3077 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3078 if (m == PETSC_DECIDE) { 3079 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3080 } else { 3081 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3082 } 3083 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3084 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3085 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3086 3087 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3088 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3089 3090 /* determine the number of messages to send, their lengths */ 3091 /*---------------------------------------------------------*/ 3092 len_s = merge->len_s; 3093 3094 len = 0; /* length of buf_si[] */ 3095 merge->nsend = 0; 3096 for (proc=0; proc<size; proc++){ 3097 len_si[proc] = 0; 3098 if (proc == rank){ 3099 len_s[proc] = 0; 3100 } else { 3101 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3102 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3103 } 3104 if (len_s[proc]) { 3105 merge->nsend++; 3106 nrows = 0; 3107 for (i=owners[proc]; i<owners[proc+1]; i++){ 3108 if (ai[i+1] > ai[i]) nrows++; 3109 } 3110 len_si[proc] = 2*(nrows+1); 3111 len += len_si[proc]; 3112 } 3113 } 3114 3115 /* determine the number and length of messages to receive for ij-structure */ 3116 /*-------------------------------------------------------------------------*/ 3117 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3118 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3119 3120 /* post the Irecv of j-structure */ 3121 /*-------------------------------*/ 3122 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3123 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3124 3125 /* post the Isend of j-structure */ 3126 /*--------------------------------*/ 3127 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3128 sj_waits = si_waits + merge->nsend; 3129 3130 for (proc=0, k=0; proc<size; proc++){ 3131 if (!len_s[proc]) continue; 3132 i = owners[proc]; 3133 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3134 k++; 3135 } 3136 3137 /* receives and sends of j-structure are complete */ 3138 /*------------------------------------------------*/ 3139 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3140 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3141 3142 /* send and recv i-structure */ 3143 /*---------------------------*/ 3144 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3145 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3146 3147 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3148 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3149 for (proc=0,k=0; proc<size; proc++){ 3150 if (!len_s[proc]) continue; 3151 /* form outgoing message for i-structure: 3152 buf_si[0]: nrows to be sent 3153 [1:nrows]: row index (global) 3154 [nrows+1:2*nrows+1]: i-structure index 3155 */ 3156 /*-------------------------------------------*/ 3157 nrows = len_si[proc]/2 - 1; 3158 buf_si_i = buf_si + nrows+1; 3159 buf_si[0] = nrows; 3160 buf_si_i[0] = 0; 3161 nrows = 0; 3162 for (i=owners[proc]; i<owners[proc+1]; i++){ 3163 anzi = ai[i+1] - ai[i]; 3164 if (anzi) { 3165 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3166 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3167 nrows++; 3168 } 3169 } 3170 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3171 k++; 3172 buf_si += len_si[proc]; 3173 } 3174 3175 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3176 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3177 3178 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr); 3179 for (i=0; i<merge->nrecv; i++){ 3180 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr); 3181 } 3182 3183 ierr = PetscFree(len_si);CHKERRQ(ierr); 3184 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3185 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3186 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3187 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3188 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3189 ierr = PetscFree(status);CHKERRQ(ierr); 3190 3191 /* compute a local seq matrix in each processor */ 3192 /*----------------------------------------------*/ 3193 /* allocate bi array and free space for accumulating nonzero column info */ 3194 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3195 bi[0] = 0; 3196 3197 /* create and initialize a linked list */ 3198 nlnk = N+1; 3199 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3200 3201 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3202 len = 0; 3203 len = ai[owners[rank+1]] - ai[owners[rank]]; 3204 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3205 current_space = free_space; 3206 3207 /* determine symbolic info for each local row */ 3208 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3209 nextrow = buf_ri_k + merge->nrecv; 3210 nextai = nextrow + merge->nrecv; 3211 for (k=0; k<merge->nrecv; k++){ 3212 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3213 nrows = *buf_ri_k[k]; 3214 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3215 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3216 } 3217 3218 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3219 len = 0; 3220 for (i=0;i<m;i++) { 3221 bnzi = 0; 3222 /* add local non-zero cols of this proc's seqmat into lnk */ 3223 arow = owners[rank] + i; 3224 anzi = ai[arow+1] - ai[arow]; 3225 aj = a->j + ai[arow]; 3226 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3227 bnzi += nlnk; 3228 /* add received col data into lnk */ 3229 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3230 if (i == *nextrow[k]) { /* i-th row */ 3231 anzi = *(nextai[k]+1) - *nextai[k]; 3232 aj = buf_rj[k] + *nextai[k]; 3233 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3234 bnzi += nlnk; 3235 nextrow[k]++; nextai[k]++; 3236 } 3237 } 3238 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3239 3240 /* if free space is not available, make more free space */ 3241 if (current_space->local_remaining<bnzi) { 3242 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3243 nspacedouble++; 3244 } 3245 /* copy data into free space, then initialize lnk */ 3246 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3247 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3248 3249 current_space->array += bnzi; 3250 current_space->local_used += bnzi; 3251 current_space->local_remaining -= bnzi; 3252 3253 bi[i+1] = bi[i] + bnzi; 3254 } 3255 3256 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3257 3258 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3259 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3260 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3261 3262 /* create symbolic parallel matrix B_mpi */ 3263 /*---------------------------------------*/ 3264 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3265 if (n==PETSC_DECIDE) { 3266 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3267 } else { 3268 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3269 } 3270 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3271 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3272 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3273 3274 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3275 B_mpi->assembled = PETSC_FALSE; 3276 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3277 merge->bi = bi; 3278 merge->bj = bj; 3279 merge->buf_ri = buf_ri; 3280 merge->buf_rj = buf_rj; 3281 merge->coi = PETSC_NULL; 3282 merge->coj = PETSC_NULL; 3283 merge->owners_co = PETSC_NULL; 3284 3285 /* attach the supporting struct to B_mpi for reuse */ 3286 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3287 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3288 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3289 *mpimat = B_mpi; 3290 3291 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3292 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3293 PetscFunctionReturn(0); 3294 } 3295 3296 static PetscEvent logkey_seqstompi = 0; 3297 #undef __FUNCT__ 3298 #define __FUNCT__ "MatMerge_SeqsToMPI" 3299 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3300 { 3301 PetscErrorCode ierr; 3302 3303 PetscFunctionBegin; 3304 if (!logkey_seqstompi) { 3305 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3306 } 3307 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3308 if (scall == MAT_INITIAL_MATRIX){ 3309 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3310 } 3311 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3312 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3313 PetscFunctionReturn(0); 3314 } 3315 static PetscEvent logkey_getlocalmat = 0; 3316 #undef __FUNCT__ 3317 #define __FUNCT__ "MatGetLocalMat" 3318 /*@C 3319 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3320 3321 Not Collective 3322 3323 Input Parameters: 3324 + A - the matrix 3325 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3326 3327 Output Parameter: 3328 . A_loc - the local sequential matrix generated 3329 3330 Level: developer 3331 3332 @*/ 3333 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3334 { 3335 PetscErrorCode ierr; 3336 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3337 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3338 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3339 PetscScalar *aa=a->a,*ba=b->a,*ca; 3340 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3341 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3342 3343 PetscFunctionBegin; 3344 if (!logkey_getlocalmat) { 3345 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3346 } 3347 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3348 if (scall == MAT_INITIAL_MATRIX){ 3349 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3350 ci[0] = 0; 3351 for (i=0; i<am; i++){ 3352 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3353 } 3354 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3355 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3356 k = 0; 3357 for (i=0; i<am; i++) { 3358 ncols_o = bi[i+1] - bi[i]; 3359 ncols_d = ai[i+1] - ai[i]; 3360 /* off-diagonal portion of A */ 3361 for (jo=0; jo<ncols_o; jo++) { 3362 col = cmap[*bj]; 3363 if (col >= cstart) break; 3364 cj[k] = col; bj++; 3365 ca[k++] = *ba++; 3366 } 3367 /* diagonal portion of A */ 3368 for (j=0; j<ncols_d; j++) { 3369 cj[k] = cstart + *aj++; 3370 ca[k++] = *aa++; 3371 } 3372 /* off-diagonal portion of A */ 3373 for (j=jo; j<ncols_o; j++) { 3374 cj[k] = cmap[*bj++]; 3375 ca[k++] = *ba++; 3376 } 3377 } 3378 /* put together the new matrix */ 3379 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3380 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3381 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3382 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3383 mat->freedata = PETSC_TRUE; 3384 mat->nonew = 0; 3385 } else if (scall == MAT_REUSE_MATRIX){ 3386 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3387 ci = mat->i; cj = mat->j; ca = mat->a; 3388 for (i=0; i<am; i++) { 3389 /* off-diagonal portion of A */ 3390 ncols_o = bi[i+1] - bi[i]; 3391 for (jo=0; jo<ncols_o; jo++) { 3392 col = cmap[*bj]; 3393 if (col >= cstart) break; 3394 *ca++ = *ba++; bj++; 3395 } 3396 /* diagonal portion of A */ 3397 ncols_d = ai[i+1] - ai[i]; 3398 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3399 /* off-diagonal portion of A */ 3400 for (j=jo; j<ncols_o; j++) { 3401 *ca++ = *ba++; bj++; 3402 } 3403 } 3404 } else { 3405 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3406 } 3407 3408 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3409 PetscFunctionReturn(0); 3410 } 3411 3412 static PetscEvent logkey_getlocalmatcondensed = 0; 3413 #undef __FUNCT__ 3414 #define __FUNCT__ "MatGetLocalMatCondensed" 3415 /*@C 3416 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3417 3418 Not Collective 3419 3420 Input Parameters: 3421 + A - the matrix 3422 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3423 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3424 3425 Output Parameter: 3426 . A_loc - the local sequential matrix generated 3427 3428 Level: developer 3429 3430 @*/ 3431 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3432 { 3433 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3434 PetscErrorCode ierr; 3435 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3436 IS isrowa,iscola; 3437 Mat *aloc; 3438 3439 PetscFunctionBegin; 3440 if (!logkey_getlocalmatcondensed) { 3441 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3442 } 3443 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3444 if (!row){ 3445 start = a->rstart; end = a->rend; 3446 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3447 } else { 3448 isrowa = *row; 3449 } 3450 if (!col){ 3451 start = a->cstart; 3452 cmap = a->garray; 3453 nzA = a->A->n; 3454 nzB = a->B->n; 3455 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3456 ncols = 0; 3457 for (i=0; i<nzB; i++) { 3458 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3459 else break; 3460 } 3461 imark = i; 3462 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3463 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3464 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3465 ierr = PetscFree(idx);CHKERRQ(ierr); 3466 } else { 3467 iscola = *col; 3468 } 3469 if (scall != MAT_INITIAL_MATRIX){ 3470 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3471 aloc[0] = *A_loc; 3472 } 3473 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3474 *A_loc = aloc[0]; 3475 ierr = PetscFree(aloc);CHKERRQ(ierr); 3476 if (!row){ 3477 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3478 } 3479 if (!col){ 3480 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3481 } 3482 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3483 PetscFunctionReturn(0); 3484 } 3485 3486 static PetscEvent logkey_GetBrowsOfAcols = 0; 3487 #undef __FUNCT__ 3488 #define __FUNCT__ "MatGetBrowsOfAcols" 3489 /*@C 3490 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3491 3492 Collective on Mat 3493 3494 Input Parameters: 3495 + A,B - the matrices in mpiaij format 3496 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3497 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3498 3499 Output Parameter: 3500 + rowb, colb - index sets of rows and columns of B to extract 3501 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3502 - B_seq - the sequential matrix generated 3503 3504 Level: developer 3505 3506 @*/ 3507 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3508 { 3509 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3510 PetscErrorCode ierr; 3511 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3512 IS isrowb,iscolb; 3513 Mat *bseq; 3514 3515 PetscFunctionBegin; 3516 if (a->cstart != b->rstart || a->cend != b->rend){ 3517 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3518 } 3519 if (!logkey_GetBrowsOfAcols) { 3520 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3521 } 3522 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3523 3524 if (scall == MAT_INITIAL_MATRIX){ 3525 start = a->cstart; 3526 cmap = a->garray; 3527 nzA = a->A->n; 3528 nzB = a->B->n; 3529 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3530 ncols = 0; 3531 for (i=0; i<nzB; i++) { /* row < local row index */ 3532 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3533 else break; 3534 } 3535 imark = i; 3536 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3537 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3538 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3539 ierr = PetscFree(idx);CHKERRQ(ierr); 3540 *brstart = imark; 3541 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3542 } else { 3543 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3544 isrowb = *rowb; iscolb = *colb; 3545 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3546 bseq[0] = *B_seq; 3547 } 3548 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3549 *B_seq = bseq[0]; 3550 ierr = PetscFree(bseq);CHKERRQ(ierr); 3551 if (!rowb){ 3552 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3553 } else { 3554 *rowb = isrowb; 3555 } 3556 if (!colb){ 3557 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3558 } else { 3559 *colb = iscolb; 3560 } 3561 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3562 PetscFunctionReturn(0); 3563 } 3564 3565 static PetscEvent logkey_GetBrowsOfAocols = 0; 3566 #undef __FUNCT__ 3567 #define __FUNCT__ "MatGetBrowsOfAoCols" 3568 /*@C 3569 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3570 of the OFF-DIAGONAL portion of local A 3571 3572 Collective on Mat 3573 3574 Input Parameters: 3575 + A,B - the matrices in mpiaij format 3576 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3577 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3578 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3579 3580 Output Parameter: 3581 + B_oth - the sequential matrix generated 3582 3583 Level: developer 3584 3585 @*/ 3586 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3587 { 3588 VecScatter_MPI_General *gen_to,*gen_from; 3589 PetscErrorCode ierr; 3590 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3591 Mat_SeqAIJ *b_oth; 3592 VecScatter ctx=a->Mvctx; 3593 MPI_Comm comm=ctx->comm; 3594 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3595 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3596 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3597 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3598 MPI_Request *rwaits,*swaits; 3599 MPI_Status *sstatus,rstatus; 3600 PetscInt *cols; 3601 PetscScalar *vals; 3602 PetscMPIInt j; 3603 3604 PetscFunctionBegin; 3605 if (a->cstart != b->rstart || a->cend != b->rend){ 3606 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3607 } 3608 if (!logkey_GetBrowsOfAocols) { 3609 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3610 } 3611 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3612 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3613 3614 gen_to = (VecScatter_MPI_General*)ctx->todata; 3615 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3616 rvalues = gen_from->values; /* holds the length of sending row */ 3617 svalues = gen_to->values; /* holds the length of receiving row */ 3618 nrecvs = gen_from->n; 3619 nsends = gen_to->n; 3620 rwaits = gen_from->requests; 3621 swaits = gen_to->requests; 3622 srow = gen_to->indices; /* local row index to be sent */ 3623 rstarts = gen_from->starts; 3624 sstarts = gen_to->starts; 3625 rprocs = gen_from->procs; 3626 sprocs = gen_to->procs; 3627 sstatus = gen_to->sstatus; 3628 3629 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3630 if (scall == MAT_INITIAL_MATRIX){ 3631 /* i-array */ 3632 /*---------*/ 3633 /* post receives */ 3634 for (i=0; i<nrecvs; i++){ 3635 rowlen = (PetscInt*)rvalues + rstarts[i]; 3636 nrows = rstarts[i+1]-rstarts[i]; 3637 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3638 } 3639 3640 /* pack the outgoing message */ 3641 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3642 rstartsj = sstartsj + nsends +1; 3643 sstartsj[0] = 0; rstartsj[0] = 0; 3644 len = 0; /* total length of j or a array to be sent */ 3645 k = 0; 3646 for (i=0; i<nsends; i++){ 3647 rowlen = (PetscInt*)svalues + sstarts[i]; 3648 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3649 for (j=0; j<nrows; j++) { 3650 row = srow[k] + b->rowners[rank]; /* global row idx */ 3651 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3652 len += rowlen[j]; 3653 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3654 k++; 3655 } 3656 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3657 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3658 } 3659 /* recvs and sends of i-array are completed */ 3660 i = nrecvs; 3661 while (i--) { 3662 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3663 } 3664 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3665 /* allocate buffers for sending j and a arrays */ 3666 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3667 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3668 3669 /* create i-array of B_oth */ 3670 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3671 b_othi[0] = 0; 3672 len = 0; /* total length of j or a array to be received */ 3673 k = 0; 3674 for (i=0; i<nrecvs; i++){ 3675 rowlen = (PetscInt*)rvalues + rstarts[i]; 3676 nrows = rstarts[i+1]-rstarts[i]; 3677 for (j=0; j<nrows; j++) { 3678 b_othi[k+1] = b_othi[k] + rowlen[j]; 3679 len += rowlen[j]; k++; 3680 } 3681 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3682 } 3683 3684 /* allocate space for j and a arrrays of B_oth */ 3685 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3686 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3687 3688 /* j-array */ 3689 /*---------*/ 3690 /* post receives of j-array */ 3691 for (i=0; i<nrecvs; i++){ 3692 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3693 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3694 } 3695 k = 0; 3696 for (i=0; i<nsends; i++){ 3697 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3698 bufJ = bufj+sstartsj[i]; 3699 for (j=0; j<nrows; j++) { 3700 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3701 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3702 for (l=0; l<ncols; l++){ 3703 *bufJ++ = cols[l]; 3704 } 3705 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3706 } 3707 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3708 } 3709 3710 /* recvs and sends of j-array are completed */ 3711 i = nrecvs; 3712 while (i--) { 3713 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3714 } 3715 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3716 } else if (scall == MAT_REUSE_MATRIX){ 3717 sstartsj = *startsj; 3718 rstartsj = sstartsj + nsends +1; 3719 bufa = *bufa_ptr; 3720 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3721 b_otha = b_oth->a; 3722 } else { 3723 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3724 } 3725 3726 /* a-array */ 3727 /*---------*/ 3728 /* post receives of a-array */ 3729 for (i=0; i<nrecvs; i++){ 3730 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3731 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3732 } 3733 k = 0; 3734 for (i=0; i<nsends; i++){ 3735 nrows = sstarts[i+1]-sstarts[i]; 3736 bufA = bufa+sstartsj[i]; 3737 for (j=0; j<nrows; j++) { 3738 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3739 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3740 for (l=0; l<ncols; l++){ 3741 *bufA++ = vals[l]; 3742 } 3743 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3744 3745 } 3746 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3747 } 3748 /* recvs and sends of a-array are completed */ 3749 i = nrecvs; 3750 while (i--) { 3751 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3752 } 3753 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3754 3755 if (scall == MAT_INITIAL_MATRIX){ 3756 /* put together the new matrix */ 3757 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3758 3759 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3760 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3761 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3762 b_oth->freedata = PETSC_TRUE; 3763 b_oth->nonew = 0; 3764 3765 ierr = PetscFree(bufj);CHKERRQ(ierr); 3766 if (!startsj || !bufa_ptr){ 3767 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3768 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3769 } else { 3770 *startsj = sstartsj; 3771 *bufa_ptr = bufa; 3772 } 3773 } 3774 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3775 3776 PetscFunctionReturn(0); 3777 } 3778 3779 #undef __FUNCT__ 3780 #define __FUNCT__ "MatGetCommunicationStructs" 3781 /*@C 3782 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3783 3784 Not Collective 3785 3786 Input Parameters: 3787 . A - The matrix in mpiaij format 3788 3789 Output Parameter: 3790 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 3791 . colmap - A map from global column index to local index into lvec 3792 - multScatter - A scatter from the argument of a matrix-vector product to lvec 3793 3794 Level: developer 3795 3796 @*/ 3797 #if defined (PETSC_USE_CTABLE) 3798 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 3799 #else 3800 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 3801 #endif 3802 { 3803 Mat_MPIAIJ *a; 3804 3805 PetscFunctionBegin; 3806 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 3807 PetscValidPointer(lvec, 2) 3808 PetscValidPointer(colmap, 3) 3809 PetscValidPointer(multScatter, 4) 3810 a = (Mat_MPIAIJ *) A->data; 3811 if (lvec) *lvec = a->lvec; 3812 if (colmap) *colmap = a->colmap; 3813 if (multScatter) *multScatter = a->Mvctx; 3814 PetscFunctionReturn(0); 3815 } 3816 3817 /*MC 3818 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3819 3820 Options Database Keys: 3821 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3822 3823 Level: beginner 3824 3825 .seealso: MatCreateMPIAIJ 3826 M*/ 3827 3828 EXTERN_C_BEGIN 3829 #undef __FUNCT__ 3830 #define __FUNCT__ "MatCreate_MPIAIJ" 3831 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3832 { 3833 Mat_MPIAIJ *b; 3834 PetscErrorCode ierr; 3835 PetscInt i; 3836 PetscMPIInt size; 3837 3838 PetscFunctionBegin; 3839 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3840 3841 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3842 B->data = (void*)b; 3843 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3844 B->factor = 0; 3845 B->bs = 1; 3846 B->assembled = PETSC_FALSE; 3847 B->mapping = 0; 3848 3849 B->insertmode = NOT_SET_VALUES; 3850 b->size = size; 3851 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3852 3853 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3854 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3855 3856 /* the information in the maps duplicates the information computed below, eventually 3857 we should remove the duplicate information that is not contained in the maps */ 3858 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3859 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3860 3861 /* build local table of row and column ownerships */ 3862 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3863 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3864 b->cowners = b->rowners + b->size + 2; 3865 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3866 b->rowners[0] = 0; 3867 for (i=2; i<=b->size; i++) { 3868 b->rowners[i] += b->rowners[i-1]; 3869 } 3870 b->rstart = b->rowners[b->rank]; 3871 b->rend = b->rowners[b->rank+1]; 3872 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3873 b->cowners[0] = 0; 3874 for (i=2; i<=b->size; i++) { 3875 b->cowners[i] += b->cowners[i-1]; 3876 } 3877 b->cstart = b->cowners[b->rank]; 3878 b->cend = b->cowners[b->rank+1]; 3879 3880 /* build cache for off array entries formed */ 3881 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3882 b->donotstash = PETSC_FALSE; 3883 b->colmap = 0; 3884 b->garray = 0; 3885 b->roworiented = PETSC_TRUE; 3886 3887 /* stuff used for matrix vector multiply */ 3888 b->lvec = PETSC_NULL; 3889 b->Mvctx = PETSC_NULL; 3890 3891 /* stuff for MatGetRow() */ 3892 b->rowindices = 0; 3893 b->rowvalues = 0; 3894 b->getrowactive = PETSC_FALSE; 3895 3896 /* Explicitly create 2 MATSEQAIJ matrices. */ 3897 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3898 ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr); 3899 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3900 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3901 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3902 ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr); 3903 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3904 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3905 3906 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3907 "MatStoreValues_MPIAIJ", 3908 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3909 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3910 "MatRetrieveValues_MPIAIJ", 3911 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3912 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3913 "MatGetDiagonalBlock_MPIAIJ", 3914 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3915 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3916 "MatIsTranspose_MPIAIJ", 3917 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3918 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3919 "MatMPIAIJSetPreallocation_MPIAIJ", 3920 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3921 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3922 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3923 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3924 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3925 "MatDiagonalScaleLocal_MPIAIJ", 3926 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3927 PetscFunctionReturn(0); 3928 } 3929 EXTERN_C_END 3930