1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (lastcol1 > col) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (lastcol2 > col) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; \ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIAIJ" 107 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 110 PetscScalar value; 111 PetscErrorCode ierr; 112 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 113 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 114 PetscTruth roworiented = aij->roworiented; 115 116 /* Some Variables required in the macro */ 117 Mat A = aij->A; 118 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 119 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 120 PetscScalar *aa = a->a; 121 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 122 Mat B = aij->B; 123 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 124 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 125 PetscScalar *ba = b->a; 126 127 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 128 PetscInt nonew = a->nonew; 129 PetscScalar *ap1,*ap2; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (im[i] < 0) continue; 134 #if defined(PETSC_USE_DEBUG) 135 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 136 #endif 137 if (im[i] >= rstart && im[i] < rend) { 138 row = im[i] - rstart; 139 lastcol1 = -1; 140 rp1 = aj + ai[row]; 141 ap1 = aa + ai[row]; 142 rmax1 = aimax[row]; 143 nrow1 = ailen[row]; 144 low1 = 0; 145 high1 = nrow1; 146 lastcol2 = -1; 147 rp2 = bj + bi[row]; 148 ap2 = ba + bi[row]; 149 rmax2 = bimax[row]; 150 nrow2 = bilen[row]; 151 low2 = 0; 152 high2 = nrow2; 153 154 for (j=0; j<n; j++) { 155 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 156 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 157 if (in[j] >= cstart && in[j] < cend){ 158 col = in[j] - cstart; 159 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 160 } else if (in[j] < 0) continue; 161 #if defined(PETSC_USE_DEBUG) 162 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 163 #endif 164 else { 165 if (mat->was_assembled) { 166 if (!aij->colmap) { 167 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 168 } 169 #if defined (PETSC_USE_CTABLE) 170 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 171 col--; 172 #else 173 col = aij->colmap[in[j]] - 1; 174 #endif 175 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 176 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 177 col = in[j]; 178 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 179 B = aij->B; 180 b = (Mat_SeqAIJ*)B->data; 181 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 182 rp2 = bj + bi[row]; 183 ap2 = ba + bi[row]; 184 rmax2 = bimax[row]; 185 nrow2 = bilen[row]; 186 low2 = 0; 187 high2 = nrow2; 188 bm = aij->B->m; 189 ba = b->a; 190 } 191 } else col = in[j]; 192 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 193 } 194 } 195 } else { 196 if (!aij->donotstash) { 197 if (roworiented) { 198 if (ignorezeroentries && v[i*n] == 0.0) continue; 199 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 200 } else { 201 if (ignorezeroentries && v[i] == 0.0) continue; 202 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 203 } 204 } 205 } 206 } 207 PetscFunctionReturn(0); 208 } 209 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatGetValues_MPIAIJ" 213 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 214 { 215 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 216 PetscErrorCode ierr; 217 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 218 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 219 220 PetscFunctionBegin; 221 for (i=0; i<m; i++) { 222 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 223 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 224 if (idxm[i] >= rstart && idxm[i] < rend) { 225 row = idxm[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 228 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 229 if (idxn[j] >= cstart && idxn[j] < cend){ 230 col = idxn[j] - cstart; 231 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 232 } else { 233 if (!aij->colmap) { 234 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 235 } 236 #if defined (PETSC_USE_CTABLE) 237 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 238 col --; 239 #else 240 col = aij->colmap[idxn[j]] - 1; 241 #endif 242 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 243 else { 244 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 245 } 246 } 247 } 248 } else { 249 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 250 } 251 } 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 257 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 258 { 259 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 260 PetscErrorCode ierr; 261 PetscInt nstash,reallocs; 262 InsertMode addv; 263 264 PetscFunctionBegin; 265 if (aij->donotstash) { 266 PetscFunctionReturn(0); 267 } 268 269 /* make sure all processors are either in INSERTMODE or ADDMODE */ 270 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 271 if (addv == (ADD_VALUES|INSERT_VALUES)) { 272 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 273 } 274 mat->insertmode = addv; /* in case this processor had no cache */ 275 276 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 277 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 278 ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 284 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 288 PetscErrorCode ierr; 289 PetscMPIInt n; 290 PetscInt i,j,rstart,ncols,flg; 291 PetscInt *row,*col,other_disassembled; 292 PetscScalar *val; 293 InsertMode addv = mat->insertmode; 294 295 PetscFunctionBegin; 296 if (!aij->donotstash) { 297 while (1) { 298 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 299 if (!flg) break; 300 301 for (i=0; i<n;) { 302 /* Now identify the consecutive vals belonging to the same row */ 303 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 304 if (j < n) ncols = j-i; 305 else ncols = n-i; 306 /* Now assemble all these values with a single function call */ 307 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 308 i = j; 309 } 310 } 311 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 312 } 313 a->compressedrow.use = PETSC_FALSE; 314 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 315 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 316 317 /* determine if any processor has disassembled, if so we must 318 also disassemble ourselfs, in order that we may reassemble. */ 319 /* 320 if nonzero structure of submatrix B cannot change then we know that 321 no processor disassembled thus we can skip this stuff 322 */ 323 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 324 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 325 if (mat->was_assembled && !other_disassembled) { 326 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 327 } 328 } 329 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 330 b = (Mat_SeqAIJ *)aij->B->data; 331 332 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 333 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 334 } 335 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 336 b->compressedrow.use = PETSC_TRUE; 337 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 338 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 339 340 if (aij->rowvalues) { 341 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 342 aij->rowvalues = 0; 343 } 344 345 /* used by MatAXPY() */ 346 a->xtoy = 0; b->xtoy = 0; 347 a->XtoY = 0; b->XtoY = 0; 348 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 354 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 355 { 356 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 357 PetscErrorCode ierr; 358 359 PetscFunctionBegin; 360 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 361 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNCT__ 366 #define __FUNCT__ "MatZeroRows_MPIAIJ" 367 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 368 { 369 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 370 PetscErrorCode ierr; 371 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 372 PetscInt i,*owners = l->rowners; 373 PetscInt *nprocs,j,idx,nsends,row; 374 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 375 PetscInt *rvalues,count,base,slen,*source; 376 PetscInt *lens,*lrows,*values,rstart=l->rstart; 377 MPI_Comm comm = A->comm; 378 MPI_Request *send_waits,*recv_waits; 379 MPI_Status recv_status,*send_status; 380 #if defined(PETSC_DEBUG) 381 PetscTruth found = PETSC_FALSE; 382 #endif 383 384 PetscFunctionBegin; 385 /* first count number of contributors to each processor */ 386 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 387 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 388 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 389 j = 0; 390 for (i=0; i<N; i++) { 391 if (lastidx > (idx = rows[i])) j = 0; 392 lastidx = idx; 393 for (; j<size; j++) { 394 if (idx >= owners[j] && idx < owners[j+1]) { 395 nprocs[2*j]++; 396 nprocs[2*j+1] = 1; 397 owner[i] = j; 398 #if defined(PETSC_DEBUG) 399 found = PETSC_TRUE; 400 #endif 401 break; 402 } 403 } 404 #if defined(PETSC_DEBUG) 405 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 406 found = PETSC_FALSE; 407 #endif 408 } 409 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 410 411 /* inform other processors of number of messages and max length*/ 412 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 413 414 /* post receives: */ 415 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 416 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 417 for (i=0; i<nrecvs; i++) { 418 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 419 } 420 421 /* do sends: 422 1) starts[i] gives the starting index in svalues for stuff going to 423 the ith processor 424 */ 425 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 426 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 427 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 428 starts[0] = 0; 429 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 430 for (i=0; i<N; i++) { 431 svalues[starts[owner[i]]++] = rows[i]; 432 } 433 434 starts[0] = 0; 435 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 436 count = 0; 437 for (i=0; i<size; i++) { 438 if (nprocs[2*i+1]) { 439 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 440 } 441 } 442 ierr = PetscFree(starts);CHKERRQ(ierr); 443 444 base = owners[rank]; 445 446 /* wait on receives */ 447 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 448 source = lens + nrecvs; 449 count = nrecvs; slen = 0; 450 while (count) { 451 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 452 /* unpack receives into our local space */ 453 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 454 source[imdex] = recv_status.MPI_SOURCE; 455 lens[imdex] = n; 456 slen += n; 457 count--; 458 } 459 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 460 461 /* move the data into the send scatter */ 462 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 463 count = 0; 464 for (i=0; i<nrecvs; i++) { 465 values = rvalues + i*nmax; 466 for (j=0; j<lens[i]; j++) { 467 lrows[count++] = values[j] - base; 468 } 469 } 470 ierr = PetscFree(rvalues);CHKERRQ(ierr); 471 ierr = PetscFree(lens);CHKERRQ(ierr); 472 ierr = PetscFree(owner);CHKERRQ(ierr); 473 ierr = PetscFree(nprocs);CHKERRQ(ierr); 474 475 /* actually zap the local rows */ 476 /* 477 Zero the required rows. If the "diagonal block" of the matrix 478 is square and the user wishes to set the diagonal we use seperate 479 code so that MatSetValues() is not called for each diagonal allocating 480 new memory, thus calling lots of mallocs and slowing things down. 481 482 Contributed by: Matthew Knepley 483 */ 484 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 485 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 486 if ((diag != 0.0) && (l->A->M == l->A->N)) { 487 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 488 } else if (diag != 0.0) { 489 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 490 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 491 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 492 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 493 } 494 for (i = 0; i < slen; i++) { 495 row = lrows[i] + rstart; 496 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 497 } 498 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 499 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 500 } else { 501 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 502 } 503 ierr = PetscFree(lrows);CHKERRQ(ierr); 504 505 /* wait on sends */ 506 if (nsends) { 507 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 508 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 509 ierr = PetscFree(send_status);CHKERRQ(ierr); 510 } 511 ierr = PetscFree(send_waits);CHKERRQ(ierr); 512 ierr = PetscFree(svalues);CHKERRQ(ierr); 513 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "MatMult_MPIAIJ" 519 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 520 { 521 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 522 PetscErrorCode ierr; 523 PetscInt nt; 524 525 PetscFunctionBegin; 526 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 527 if (nt != A->n) { 528 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 529 } 530 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 531 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 532 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 533 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 534 PetscFunctionReturn(0); 535 } 536 537 #undef __FUNCT__ 538 #define __FUNCT__ "MatMultAdd_MPIAIJ" 539 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 540 { 541 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 542 PetscErrorCode ierr; 543 544 PetscFunctionBegin; 545 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 546 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 547 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 548 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 549 PetscFunctionReturn(0); 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 554 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 555 { 556 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 557 PetscErrorCode ierr; 558 PetscTruth merged; 559 560 PetscFunctionBegin; 561 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 562 /* do nondiagonal part */ 563 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 564 if (!merged) { 565 /* send it on its way */ 566 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 567 /* do local part */ 568 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 569 /* receive remote parts: note this assumes the values are not actually */ 570 /* added in yy until the next line, */ 571 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 572 } else { 573 /* do local part */ 574 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 575 /* send it on its way */ 576 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 577 /* values actually were received in the Begin() but we need to call this nop */ 578 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 579 } 580 PetscFunctionReturn(0); 581 } 582 583 EXTERN_C_BEGIN 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 586 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 587 { 588 MPI_Comm comm; 589 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 590 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 591 IS Me,Notme; 592 PetscErrorCode ierr; 593 PetscInt M,N,first,last,*notme,i; 594 PetscMPIInt size; 595 596 PetscFunctionBegin; 597 598 /* Easy test: symmetric diagonal block */ 599 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 600 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 601 if (!*f) PetscFunctionReturn(0); 602 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 603 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 604 if (size == 1) PetscFunctionReturn(0); 605 606 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 607 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 608 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 609 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 610 for (i=0; i<first; i++) notme[i] = i; 611 for (i=last; i<M; i++) notme[i-last+first] = i; 612 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 613 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 614 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 615 Aoff = Aoffs[0]; 616 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 617 Boff = Boffs[0]; 618 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 619 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 620 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 621 ierr = ISDestroy(Me);CHKERRQ(ierr); 622 ierr = ISDestroy(Notme);CHKERRQ(ierr); 623 624 PetscFunctionReturn(0); 625 } 626 EXTERN_C_END 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 630 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 631 { 632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 633 PetscErrorCode ierr; 634 635 PetscFunctionBegin; 636 /* do nondiagonal part */ 637 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 638 /* send it on its way */ 639 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 640 /* do local part */ 641 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 642 /* receive remote parts */ 643 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 /* 648 This only works correctly for square matrices where the subblock A->A is the 649 diagonal block 650 */ 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 653 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 654 { 655 PetscErrorCode ierr; 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 658 PetscFunctionBegin; 659 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 660 if (a->rstart != a->cstart || a->rend != a->cend) { 661 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 662 } 663 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatScale_MPIAIJ" 669 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 670 { 671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 672 PetscErrorCode ierr; 673 674 PetscFunctionBegin; 675 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 676 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatDestroy_MPIAIJ" 682 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 PetscErrorCode ierr; 686 687 PetscFunctionBegin; 688 #if defined(PETSC_USE_LOG) 689 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 697 #else 698 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 699 #endif 700 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 701 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 702 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 703 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 704 ierr = PetscFree(aij);CHKERRQ(ierr); 705 706 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 707 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 708 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 709 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 710 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 711 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 712 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatView_MPIAIJ_Binary" 718 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 722 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 723 PetscErrorCode ierr; 724 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 725 int fd; 726 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 727 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 728 PetscScalar *column_values; 729 730 PetscFunctionBegin; 731 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 732 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 733 nz = A->nz + B->nz; 734 if (!rank) { 735 header[0] = MAT_FILE_COOKIE; 736 header[1] = mat->M; 737 header[2] = mat->N; 738 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 739 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 740 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 741 /* get largest number of rows any processor has */ 742 rlen = mat->m; 743 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 744 for (i=1; i<size; i++) { 745 rlen = PetscMax(rlen,range[i+1] - range[i]); 746 } 747 } else { 748 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 749 rlen = mat->m; 750 } 751 752 /* load up the local row counts */ 753 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 754 for (i=0; i<mat->m; i++) { 755 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 756 } 757 758 /* store the row lengths to the file */ 759 if (!rank) { 760 MPI_Status status; 761 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 762 for (i=1; i<size; i++) { 763 rlen = range[i+1] - range[i]; 764 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 765 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 766 } 767 } else { 768 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 769 } 770 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 771 772 /* load up the local column indices */ 773 nzmax = nz; /* )th processor needs space a largest processor needs */ 774 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 775 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 776 cnt = 0; 777 for (i=0; i<mat->m; i++) { 778 for (j=B->i[i]; j<B->i[i+1]; j++) { 779 if ( (col = garray[B->j[j]]) > cstart) break; 780 column_indices[cnt++] = col; 781 } 782 for (k=A->i[i]; k<A->i[i+1]; k++) { 783 column_indices[cnt++] = A->j[k] + cstart; 784 } 785 for (; j<B->i[i+1]; j++) { 786 column_indices[cnt++] = garray[B->j[j]]; 787 } 788 } 789 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 790 791 /* store the column indices to the file */ 792 if (!rank) { 793 MPI_Status status; 794 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 795 for (i=1; i<size; i++) { 796 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 797 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 798 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 799 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 800 } 801 } else { 802 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 803 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 804 } 805 ierr = PetscFree(column_indices);CHKERRQ(ierr); 806 807 /* load up the local column values */ 808 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 809 cnt = 0; 810 for (i=0; i<mat->m; i++) { 811 for (j=B->i[i]; j<B->i[i+1]; j++) { 812 if ( garray[B->j[j]] > cstart) break; 813 column_values[cnt++] = B->a[j]; 814 } 815 for (k=A->i[i]; k<A->i[i+1]; k++) { 816 column_values[cnt++] = A->a[k]; 817 } 818 for (; j<B->i[i+1]; j++) { 819 column_values[cnt++] = B->a[j]; 820 } 821 } 822 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 823 824 /* store the column values to the file */ 825 if (!rank) { 826 MPI_Status status; 827 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 828 for (i=1; i<size; i++) { 829 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 830 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 831 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 832 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 833 } 834 } else { 835 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 836 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 837 } 838 ierr = PetscFree(column_values);CHKERRQ(ierr); 839 PetscFunctionReturn(0); 840 } 841 842 #undef __FUNCT__ 843 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 844 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 845 { 846 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 847 PetscErrorCode ierr; 848 PetscMPIInt rank = aij->rank,size = aij->size; 849 PetscTruth isdraw,iascii,isbinary; 850 PetscViewer sviewer; 851 PetscViewerFormat format; 852 853 PetscFunctionBegin; 854 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 855 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 856 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 857 if (iascii) { 858 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 859 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 860 MatInfo info; 861 PetscTruth inodes; 862 863 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 864 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 865 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 866 if (!inodes) { 867 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 868 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 869 } else { 870 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 871 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 872 } 873 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 874 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 875 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 876 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 877 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 878 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } else if (format == PETSC_VIEWER_ASCII_INFO) { 881 PetscInt inodecount,inodelimit,*inodes; 882 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 883 if (inodes) { 884 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 885 } else { 886 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 887 } 888 PetscFunctionReturn(0); 889 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 890 PetscFunctionReturn(0); 891 } 892 } else if (isbinary) { 893 if (size == 1) { 894 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 895 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 896 } else { 897 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 898 } 899 PetscFunctionReturn(0); 900 } else if (isdraw) { 901 PetscDraw draw; 902 PetscTruth isnull; 903 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 904 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 905 } 906 907 if (size == 1) { 908 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 909 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 910 } else { 911 /* assemble the entire matrix onto first processor. */ 912 Mat A; 913 Mat_SeqAIJ *Aloc; 914 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 915 PetscScalar *a; 916 917 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 918 if (!rank) { 919 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 920 } else { 921 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 922 } 923 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 924 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 925 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 926 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 927 928 /* copy over the A part */ 929 Aloc = (Mat_SeqAIJ*)aij->A->data; 930 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 931 row = aij->rstart; 932 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 933 for (i=0; i<m; i++) { 934 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 935 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 936 } 937 aj = Aloc->j; 938 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 939 940 /* copy over the B part */ 941 Aloc = (Mat_SeqAIJ*)aij->B->data; 942 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 943 row = aij->rstart; 944 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 945 ct = cols; 946 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 947 for (i=0; i<m; i++) { 948 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 949 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 950 } 951 ierr = PetscFree(ct);CHKERRQ(ierr); 952 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 953 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 954 /* 955 Everyone has to call to draw the matrix since the graphics waits are 956 synchronized across all processors that share the PetscDraw object 957 */ 958 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 959 if (!rank) { 960 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 961 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 962 } 963 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 964 ierr = MatDestroy(A);CHKERRQ(ierr); 965 } 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatView_MPIAIJ" 971 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 972 { 973 PetscErrorCode ierr; 974 PetscTruth iascii,isdraw,issocket,isbinary; 975 976 PetscFunctionBegin; 977 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 978 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 979 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 980 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 981 if (iascii || isdraw || isbinary || issocket) { 982 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 983 } else { 984 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 985 } 986 PetscFunctionReturn(0); 987 } 988 989 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatRelax_MPIAIJ" 993 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 994 { 995 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 996 PetscErrorCode ierr; 997 Vec bb1; 998 PetscScalar mone=-1.0; 999 1000 PetscFunctionBegin; 1001 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1002 1003 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1004 1005 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1006 if (flag & SOR_ZERO_INITIAL_GUESS) { 1007 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1008 its--; 1009 } 1010 1011 while (its--) { 1012 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1013 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1014 1015 /* update rhs: bb1 = bb - B*x */ 1016 ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr); 1017 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1018 1019 /* local sweep */ 1020 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1021 CHKERRQ(ierr); 1022 } 1023 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1024 if (flag & SOR_ZERO_INITIAL_GUESS) { 1025 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1026 its--; 1027 } 1028 while (its--) { 1029 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1030 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1031 1032 /* update rhs: bb1 = bb - B*x */ 1033 ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr); 1034 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1035 1036 /* local sweep */ 1037 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1038 CHKERRQ(ierr); 1039 } 1040 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1041 if (flag & SOR_ZERO_INITIAL_GUESS) { 1042 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1043 its--; 1044 } 1045 while (its--) { 1046 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1047 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1048 1049 /* update rhs: bb1 = bb - B*x */ 1050 ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr); 1051 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1052 1053 /* local sweep */ 1054 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1055 CHKERRQ(ierr); 1056 } 1057 } else { 1058 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1059 } 1060 1061 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatPermute_MPIAIJ" 1067 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1068 { 1069 MPI_Comm comm,pcomm; 1070 PetscInt first,local_size,nrows,*rows; 1071 int ntids; 1072 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1073 PetscErrorCode ierr; 1074 1075 PetscFunctionBegin; 1076 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1077 /* make a collective version of 'rowp' */ 1078 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1079 if (pcomm==comm) { 1080 crowp = rowp; 1081 } else { 1082 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1083 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1084 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1085 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1086 } 1087 /* collect the global row permutation and invert it */ 1088 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1089 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1090 if (pcomm!=comm) { 1091 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1092 } 1093 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1094 /* get the local target indices */ 1095 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1096 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1097 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1098 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1099 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1100 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1101 /* the column permutation is so much easier; 1102 make a local version of 'colp' and invert it */ 1103 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1104 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1105 if (ntids==1) { 1106 lcolp = colp; 1107 } else { 1108 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1109 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1110 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1111 } 1112 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1113 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1114 if (ntids>1) { 1115 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1116 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1117 } 1118 /* now we just get the submatrix */ 1119 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1120 /* clean up */ 1121 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1122 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1128 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1129 { 1130 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1131 Mat A = mat->A,B = mat->B; 1132 PetscErrorCode ierr; 1133 PetscReal isend[5],irecv[5]; 1134 1135 PetscFunctionBegin; 1136 info->block_size = 1.0; 1137 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1138 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1139 isend[3] = info->memory; isend[4] = info->mallocs; 1140 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1141 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1142 isend[3] += info->memory; isend[4] += info->mallocs; 1143 if (flag == MAT_LOCAL) { 1144 info->nz_used = isend[0]; 1145 info->nz_allocated = isend[1]; 1146 info->nz_unneeded = isend[2]; 1147 info->memory = isend[3]; 1148 info->mallocs = isend[4]; 1149 } else if (flag == MAT_GLOBAL_MAX) { 1150 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1151 info->nz_used = irecv[0]; 1152 info->nz_allocated = irecv[1]; 1153 info->nz_unneeded = irecv[2]; 1154 info->memory = irecv[3]; 1155 info->mallocs = irecv[4]; 1156 } else if (flag == MAT_GLOBAL_SUM) { 1157 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1158 info->nz_used = irecv[0]; 1159 info->nz_allocated = irecv[1]; 1160 info->nz_unneeded = irecv[2]; 1161 info->memory = irecv[3]; 1162 info->mallocs = irecv[4]; 1163 } 1164 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1165 info->fill_ratio_needed = 0; 1166 info->factor_mallocs = 0; 1167 info->rows_global = (double)matin->M; 1168 info->columns_global = (double)matin->N; 1169 info->rows_local = (double)matin->m; 1170 info->columns_local = (double)matin->N; 1171 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatSetOption_MPIAIJ" 1177 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1178 { 1179 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1180 PetscErrorCode ierr; 1181 1182 PetscFunctionBegin; 1183 switch (op) { 1184 case MAT_NO_NEW_NONZERO_LOCATIONS: 1185 case MAT_YES_NEW_NONZERO_LOCATIONS: 1186 case MAT_COLUMNS_UNSORTED: 1187 case MAT_COLUMNS_SORTED: 1188 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1189 case MAT_KEEP_ZEROED_ROWS: 1190 case MAT_NEW_NONZERO_LOCATION_ERR: 1191 case MAT_USE_INODES: 1192 case MAT_DO_NOT_USE_INODES: 1193 case MAT_IGNORE_ZERO_ENTRIES: 1194 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1195 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1196 break; 1197 case MAT_ROW_ORIENTED: 1198 a->roworiented = PETSC_TRUE; 1199 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1200 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1201 break; 1202 case MAT_ROWS_SORTED: 1203 case MAT_ROWS_UNSORTED: 1204 case MAT_YES_NEW_DIAGONALS: 1205 ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr); 1206 break; 1207 case MAT_COLUMN_ORIENTED: 1208 a->roworiented = PETSC_FALSE; 1209 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1210 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1211 break; 1212 case MAT_IGNORE_OFF_PROC_ENTRIES: 1213 a->donotstash = PETSC_TRUE; 1214 break; 1215 case MAT_NO_NEW_DIAGONALS: 1216 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1217 case MAT_SYMMETRIC: 1218 case MAT_STRUCTURALLY_SYMMETRIC: 1219 case MAT_HERMITIAN: 1220 case MAT_SYMMETRY_ETERNAL: 1221 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1222 break; 1223 case MAT_NOT_SYMMETRIC: 1224 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1225 case MAT_NOT_HERMITIAN: 1226 case MAT_NOT_SYMMETRY_ETERNAL: 1227 break; 1228 default: 1229 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1230 } 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatGetRow_MPIAIJ" 1236 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1237 { 1238 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1239 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1240 PetscErrorCode ierr; 1241 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1242 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1243 PetscInt *cmap,*idx_p; 1244 1245 PetscFunctionBegin; 1246 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1247 mat->getrowactive = PETSC_TRUE; 1248 1249 if (!mat->rowvalues && (idx || v)) { 1250 /* 1251 allocate enough space to hold information from the longest row. 1252 */ 1253 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1254 PetscInt max = 1,tmp; 1255 for (i=0; i<matin->m; i++) { 1256 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1257 if (max < tmp) { max = tmp; } 1258 } 1259 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1260 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1261 } 1262 1263 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1264 lrow = row - rstart; 1265 1266 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1267 if (!v) {pvA = 0; pvB = 0;} 1268 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1269 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1270 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1271 nztot = nzA + nzB; 1272 1273 cmap = mat->garray; 1274 if (v || idx) { 1275 if (nztot) { 1276 /* Sort by increasing column numbers, assuming A and B already sorted */ 1277 PetscInt imark = -1; 1278 if (v) { 1279 *v = v_p = mat->rowvalues; 1280 for (i=0; i<nzB; i++) { 1281 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1282 else break; 1283 } 1284 imark = i; 1285 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1286 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1287 } 1288 if (idx) { 1289 *idx = idx_p = mat->rowindices; 1290 if (imark > -1) { 1291 for (i=0; i<imark; i++) { 1292 idx_p[i] = cmap[cworkB[i]]; 1293 } 1294 } else { 1295 for (i=0; i<nzB; i++) { 1296 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1297 else break; 1298 } 1299 imark = i; 1300 } 1301 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1302 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1303 } 1304 } else { 1305 if (idx) *idx = 0; 1306 if (v) *v = 0; 1307 } 1308 } 1309 *nz = nztot; 1310 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1311 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1317 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1318 { 1319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1320 1321 PetscFunctionBegin; 1322 if (!aij->getrowactive) { 1323 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1324 } 1325 aij->getrowactive = PETSC_FALSE; 1326 PetscFunctionReturn(0); 1327 } 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatNorm_MPIAIJ" 1331 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1332 { 1333 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1334 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1335 PetscErrorCode ierr; 1336 PetscInt i,j,cstart = aij->cstart; 1337 PetscReal sum = 0.0; 1338 PetscScalar *v; 1339 1340 PetscFunctionBegin; 1341 if (aij->size == 1) { 1342 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1343 } else { 1344 if (type == NORM_FROBENIUS) { 1345 v = amat->a; 1346 for (i=0; i<amat->nz; i++) { 1347 #if defined(PETSC_USE_COMPLEX) 1348 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1349 #else 1350 sum += (*v)*(*v); v++; 1351 #endif 1352 } 1353 v = bmat->a; 1354 for (i=0; i<bmat->nz; i++) { 1355 #if defined(PETSC_USE_COMPLEX) 1356 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1357 #else 1358 sum += (*v)*(*v); v++; 1359 #endif 1360 } 1361 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1362 *norm = sqrt(*norm); 1363 } else if (type == NORM_1) { /* max column norm */ 1364 PetscReal *tmp,*tmp2; 1365 PetscInt *jj,*garray = aij->garray; 1366 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1367 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1368 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1369 *norm = 0.0; 1370 v = amat->a; jj = amat->j; 1371 for (j=0; j<amat->nz; j++) { 1372 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1373 } 1374 v = bmat->a; jj = bmat->j; 1375 for (j=0; j<bmat->nz; j++) { 1376 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1377 } 1378 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1379 for (j=0; j<mat->N; j++) { 1380 if (tmp2[j] > *norm) *norm = tmp2[j]; 1381 } 1382 ierr = PetscFree(tmp);CHKERRQ(ierr); 1383 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1384 } else if (type == NORM_INFINITY) { /* max row norm */ 1385 PetscReal ntemp = 0.0; 1386 for (j=0; j<aij->A->m; j++) { 1387 v = amat->a + amat->i[j]; 1388 sum = 0.0; 1389 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1390 sum += PetscAbsScalar(*v); v++; 1391 } 1392 v = bmat->a + bmat->i[j]; 1393 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1394 sum += PetscAbsScalar(*v); v++; 1395 } 1396 if (sum > ntemp) ntemp = sum; 1397 } 1398 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1399 } else { 1400 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1401 } 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatTranspose_MPIAIJ" 1408 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1409 { 1410 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1411 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1412 PetscErrorCode ierr; 1413 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1414 Mat B; 1415 PetscScalar *array; 1416 1417 PetscFunctionBegin; 1418 if (!matout && M != N) { 1419 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1420 } 1421 1422 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1423 ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr); 1424 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1425 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1426 1427 /* copy over the A part */ 1428 Aloc = (Mat_SeqAIJ*)a->A->data; 1429 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1430 row = a->rstart; 1431 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1432 for (i=0; i<m; i++) { 1433 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1434 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1435 } 1436 aj = Aloc->j; 1437 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1438 1439 /* copy over the B part */ 1440 Aloc = (Mat_SeqAIJ*)a->B->data; 1441 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1442 row = a->rstart; 1443 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1444 ct = cols; 1445 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1446 for (i=0; i<m; i++) { 1447 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1448 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1449 } 1450 ierr = PetscFree(ct);CHKERRQ(ierr); 1451 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1452 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1453 if (matout) { 1454 *matout = B; 1455 } else { 1456 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1457 } 1458 PetscFunctionReturn(0); 1459 } 1460 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1463 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1464 { 1465 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1466 Mat a = aij->A,b = aij->B; 1467 PetscErrorCode ierr; 1468 PetscInt s1,s2,s3; 1469 1470 PetscFunctionBegin; 1471 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1472 if (rr) { 1473 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1474 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1475 /* Overlap communication with computation. */ 1476 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1477 } 1478 if (ll) { 1479 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1480 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1481 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1482 } 1483 /* scale the diagonal block */ 1484 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1485 1486 if (rr) { 1487 /* Do a scatter end and then right scale the off-diagonal block */ 1488 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1489 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1490 } 1491 1492 PetscFunctionReturn(0); 1493 } 1494 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1498 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1499 { 1500 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1501 PetscErrorCode ierr; 1502 1503 PetscFunctionBegin; 1504 if (!a->rank) { 1505 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1506 } 1507 PetscFunctionReturn(0); 1508 } 1509 1510 #undef __FUNCT__ 1511 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1512 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1513 { 1514 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1515 PetscErrorCode ierr; 1516 1517 PetscFunctionBegin; 1518 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1519 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 #undef __FUNCT__ 1523 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1524 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1525 { 1526 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1527 PetscErrorCode ierr; 1528 1529 PetscFunctionBegin; 1530 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatEqual_MPIAIJ" 1536 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1537 { 1538 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1539 Mat a,b,c,d; 1540 PetscTruth flg; 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 a = matA->A; b = matA->B; 1545 c = matB->A; d = matB->B; 1546 1547 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1548 if (flg) { 1549 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1550 } 1551 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatCopy_MPIAIJ" 1557 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1558 { 1559 PetscErrorCode ierr; 1560 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1561 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1562 1563 PetscFunctionBegin; 1564 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1565 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1566 /* because of the column compression in the off-processor part of the matrix a->B, 1567 the number of columns in a->B and b->B may be different, hence we cannot call 1568 the MatCopy() directly on the two parts. If need be, we can provide a more 1569 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1570 then copying the submatrices */ 1571 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1572 } else { 1573 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1574 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1575 } 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1581 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1582 { 1583 PetscErrorCode ierr; 1584 1585 PetscFunctionBegin; 1586 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #include "petscblaslapack.h" 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatAXPY_MPIAIJ" 1593 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1594 { 1595 PetscErrorCode ierr; 1596 PetscInt i; 1597 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1598 PetscBLASInt bnz,one=1; 1599 Mat_SeqAIJ *x,*y; 1600 1601 PetscFunctionBegin; 1602 if (str == SAME_NONZERO_PATTERN) { 1603 PetscScalar alpha = a; 1604 x = (Mat_SeqAIJ *)xx->A->data; 1605 y = (Mat_SeqAIJ *)yy->A->data; 1606 bnz = (PetscBLASInt)x->nz; 1607 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1608 x = (Mat_SeqAIJ *)xx->B->data; 1609 y = (Mat_SeqAIJ *)yy->B->data; 1610 bnz = (PetscBLASInt)x->nz; 1611 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1612 } else if (str == SUBSET_NONZERO_PATTERN) { 1613 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1614 1615 x = (Mat_SeqAIJ *)xx->B->data; 1616 y = (Mat_SeqAIJ *)yy->B->data; 1617 if (y->xtoy && y->XtoY != xx->B) { 1618 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1619 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1620 } 1621 if (!y->xtoy) { /* get xtoy */ 1622 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1623 y->XtoY = xx->B; 1624 } 1625 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1626 } else { 1627 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1628 } 1629 PetscFunctionReturn(0); 1630 } 1631 1632 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1633 1634 #undef __FUNCT__ 1635 #define __FUNCT__ "MatConjugate_MPIAIJ" 1636 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1637 { 1638 #if defined(PETSC_USE_COMPLEX) 1639 PetscErrorCode ierr; 1640 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1641 1642 PetscFunctionBegin; 1643 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1644 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1645 #else 1646 PetscFunctionBegin; 1647 #endif 1648 PetscFunctionReturn(0); 1649 } 1650 1651 /* -------------------------------------------------------------------*/ 1652 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1653 MatGetRow_MPIAIJ, 1654 MatRestoreRow_MPIAIJ, 1655 MatMult_MPIAIJ, 1656 /* 4*/ MatMultAdd_MPIAIJ, 1657 MatMultTranspose_MPIAIJ, 1658 MatMultTransposeAdd_MPIAIJ, 1659 0, 1660 0, 1661 0, 1662 /*10*/ 0, 1663 0, 1664 0, 1665 MatRelax_MPIAIJ, 1666 MatTranspose_MPIAIJ, 1667 /*15*/ MatGetInfo_MPIAIJ, 1668 MatEqual_MPIAIJ, 1669 MatGetDiagonal_MPIAIJ, 1670 MatDiagonalScale_MPIAIJ, 1671 MatNorm_MPIAIJ, 1672 /*20*/ MatAssemblyBegin_MPIAIJ, 1673 MatAssemblyEnd_MPIAIJ, 1674 0, 1675 MatSetOption_MPIAIJ, 1676 MatZeroEntries_MPIAIJ, 1677 /*25*/ MatZeroRows_MPIAIJ, 1678 0, 1679 0, 1680 0, 1681 0, 1682 /*30*/ MatSetUpPreallocation_MPIAIJ, 1683 0, 1684 0, 1685 0, 1686 0, 1687 /*35*/ MatDuplicate_MPIAIJ, 1688 0, 1689 0, 1690 0, 1691 0, 1692 /*40*/ MatAXPY_MPIAIJ, 1693 MatGetSubMatrices_MPIAIJ, 1694 MatIncreaseOverlap_MPIAIJ, 1695 MatGetValues_MPIAIJ, 1696 MatCopy_MPIAIJ, 1697 /*45*/ MatPrintHelp_MPIAIJ, 1698 MatScale_MPIAIJ, 1699 0, 1700 0, 1701 0, 1702 /*50*/ MatSetBlockSize_MPIAIJ, 1703 0, 1704 0, 1705 0, 1706 0, 1707 /*55*/ MatFDColoringCreate_MPIAIJ, 1708 0, 1709 MatSetUnfactored_MPIAIJ, 1710 MatPermute_MPIAIJ, 1711 0, 1712 /*60*/ MatGetSubMatrix_MPIAIJ, 1713 MatDestroy_MPIAIJ, 1714 MatView_MPIAIJ, 1715 MatGetPetscMaps_Petsc, 1716 0, 1717 /*65*/ 0, 1718 0, 1719 0, 1720 0, 1721 0, 1722 /*70*/ 0, 1723 0, 1724 MatSetColoring_MPIAIJ, 1725 #if defined(PETSC_HAVE_ADIC) 1726 MatSetValuesAdic_MPIAIJ, 1727 #else 1728 0, 1729 #endif 1730 MatSetValuesAdifor_MPIAIJ, 1731 /*75*/ 0, 1732 0, 1733 0, 1734 0, 1735 0, 1736 /*80*/ 0, 1737 0, 1738 0, 1739 0, 1740 /*84*/ MatLoad_MPIAIJ, 1741 0, 1742 0, 1743 0, 1744 0, 1745 0, 1746 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1747 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1748 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1749 MatPtAP_Basic, 1750 MatPtAPSymbolic_MPIAIJ, 1751 /*95*/ MatPtAPNumeric_MPIAIJ, 1752 0, 1753 0, 1754 0, 1755 0, 1756 /*100*/0, 1757 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1758 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1759 MatConjugate_MPIAIJ 1760 }; 1761 1762 /* ----------------------------------------------------------------------------------------*/ 1763 1764 EXTERN_C_BEGIN 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1767 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1768 { 1769 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1770 PetscErrorCode ierr; 1771 1772 PetscFunctionBegin; 1773 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1774 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 EXTERN_C_END 1778 1779 EXTERN_C_BEGIN 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1782 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1783 { 1784 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1785 PetscErrorCode ierr; 1786 1787 PetscFunctionBegin; 1788 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1789 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 EXTERN_C_END 1793 1794 #include "petscpc.h" 1795 EXTERN_C_BEGIN 1796 #undef __FUNCT__ 1797 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1798 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1799 { 1800 Mat_MPIAIJ *b; 1801 PetscErrorCode ierr; 1802 PetscInt i; 1803 1804 PetscFunctionBegin; 1805 B->preallocated = PETSC_TRUE; 1806 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1807 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1808 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1809 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1810 if (d_nnz) { 1811 for (i=0; i<B->m; i++) { 1812 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1813 } 1814 } 1815 if (o_nnz) { 1816 for (i=0; i<B->m; i++) { 1817 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1818 } 1819 } 1820 b = (Mat_MPIAIJ*)B->data; 1821 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1822 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1823 1824 PetscFunctionReturn(0); 1825 } 1826 EXTERN_C_END 1827 1828 #undef __FUNCT__ 1829 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1830 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1831 { 1832 Mat mat; 1833 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1834 PetscErrorCode ierr; 1835 1836 PetscFunctionBegin; 1837 *newmat = 0; 1838 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1839 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1840 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1841 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1842 a = (Mat_MPIAIJ*)mat->data; 1843 1844 mat->factor = matin->factor; 1845 mat->bs = matin->bs; 1846 mat->assembled = PETSC_TRUE; 1847 mat->insertmode = NOT_SET_VALUES; 1848 mat->preallocated = PETSC_TRUE; 1849 1850 a->rstart = oldmat->rstart; 1851 a->rend = oldmat->rend; 1852 a->cstart = oldmat->cstart; 1853 a->cend = oldmat->cend; 1854 a->size = oldmat->size; 1855 a->rank = oldmat->rank; 1856 a->donotstash = oldmat->donotstash; 1857 a->roworiented = oldmat->roworiented; 1858 a->rowindices = 0; 1859 a->rowvalues = 0; 1860 a->getrowactive = PETSC_FALSE; 1861 1862 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1863 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1864 if (oldmat->colmap) { 1865 #if defined (PETSC_USE_CTABLE) 1866 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1867 #else 1868 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1869 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1870 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1871 #endif 1872 } else a->colmap = 0; 1873 if (oldmat->garray) { 1874 PetscInt len; 1875 len = oldmat->B->n; 1876 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1877 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1878 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1879 } else a->garray = 0; 1880 1881 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1882 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1883 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1884 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1885 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1886 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1887 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1888 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1889 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1890 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1891 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1892 *newmat = mat; 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #include "petscsys.h" 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatLoad_MPIAIJ" 1900 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1901 { 1902 Mat A; 1903 PetscScalar *vals,*svals; 1904 MPI_Comm comm = ((PetscObject)viewer)->comm; 1905 MPI_Status status; 1906 PetscErrorCode ierr; 1907 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1908 PetscInt i,nz,j,rstart,rend,mmax; 1909 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1910 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1911 PetscInt cend,cstart,n,*rowners; 1912 int fd; 1913 1914 PetscFunctionBegin; 1915 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1916 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1917 if (!rank) { 1918 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1919 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1920 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1921 } 1922 1923 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1924 M = header[1]; N = header[2]; 1925 /* determine ownership of all rows */ 1926 m = M/size + ((M % size) > rank); 1927 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1928 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1929 1930 /* First process needs enough room for process with most rows */ 1931 if (!rank) { 1932 mmax = rowners[1]; 1933 for (i=2; i<size; i++) { 1934 mmax = PetscMax(mmax,rowners[i]); 1935 } 1936 } else mmax = m; 1937 1938 rowners[0] = 0; 1939 for (i=2; i<=size; i++) { 1940 mmax = PetscMax(mmax,rowners[i]); 1941 rowners[i] += rowners[i-1]; 1942 } 1943 rstart = rowners[rank]; 1944 rend = rowners[rank+1]; 1945 1946 /* distribute row lengths to all processors */ 1947 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 1948 if (!rank) { 1949 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1950 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1951 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1952 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1953 for (j=0; j<m; j++) { 1954 procsnz[0] += ourlens[j]; 1955 } 1956 for (i=1; i<size; i++) { 1957 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1958 /* calculate the number of nonzeros on each processor */ 1959 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1960 procsnz[i] += rowlengths[j]; 1961 } 1962 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1963 } 1964 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1965 } else { 1966 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1967 } 1968 1969 if (!rank) { 1970 /* determine max buffer needed and allocate it */ 1971 maxnz = 0; 1972 for (i=0; i<size; i++) { 1973 maxnz = PetscMax(maxnz,procsnz[i]); 1974 } 1975 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1976 1977 /* read in my part of the matrix column indices */ 1978 nz = procsnz[0]; 1979 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1980 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1981 1982 /* read in every one elses and ship off */ 1983 for (i=1; i<size; i++) { 1984 nz = procsnz[i]; 1985 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1986 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1987 } 1988 ierr = PetscFree(cols);CHKERRQ(ierr); 1989 } else { 1990 /* determine buffer space needed for message */ 1991 nz = 0; 1992 for (i=0; i<m; i++) { 1993 nz += ourlens[i]; 1994 } 1995 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1996 1997 /* receive message of column indices*/ 1998 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1999 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2000 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2001 } 2002 2003 /* determine column ownership if matrix is not square */ 2004 if (N != M) { 2005 n = N/size + ((N % size) > rank); 2006 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2007 cstart = cend - n; 2008 } else { 2009 cstart = rstart; 2010 cend = rend; 2011 n = cend - cstart; 2012 } 2013 2014 /* loop over local rows, determining number of off diagonal entries */ 2015 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2016 jj = 0; 2017 for (i=0; i<m; i++) { 2018 for (j=0; j<ourlens[i]; j++) { 2019 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2020 jj++; 2021 } 2022 } 2023 2024 /* create our matrix */ 2025 for (i=0; i<m; i++) { 2026 ourlens[i] -= offlens[i]; 2027 } 2028 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2029 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2030 ierr = MatSetType(A,type);CHKERRQ(ierr); 2031 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2032 2033 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2034 for (i=0; i<m; i++) { 2035 ourlens[i] += offlens[i]; 2036 } 2037 2038 if (!rank) { 2039 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2040 2041 /* read in my part of the matrix numerical values */ 2042 nz = procsnz[0]; 2043 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2044 2045 /* insert into matrix */ 2046 jj = rstart; 2047 smycols = mycols; 2048 svals = vals; 2049 for (i=0; i<m; i++) { 2050 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2051 smycols += ourlens[i]; 2052 svals += ourlens[i]; 2053 jj++; 2054 } 2055 2056 /* read in other processors and ship out */ 2057 for (i=1; i<size; i++) { 2058 nz = procsnz[i]; 2059 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2060 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2061 } 2062 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2063 } else { 2064 /* receive numeric values */ 2065 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2066 2067 /* receive message of values*/ 2068 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2069 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2070 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2071 2072 /* insert into matrix */ 2073 jj = rstart; 2074 smycols = mycols; 2075 svals = vals; 2076 for (i=0; i<m; i++) { 2077 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2078 smycols += ourlens[i]; 2079 svals += ourlens[i]; 2080 jj++; 2081 } 2082 } 2083 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2084 ierr = PetscFree(vals);CHKERRQ(ierr); 2085 ierr = PetscFree(mycols);CHKERRQ(ierr); 2086 ierr = PetscFree(rowners);CHKERRQ(ierr); 2087 2088 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2089 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2090 *newmat = A; 2091 PetscFunctionReturn(0); 2092 } 2093 2094 #undef __FUNCT__ 2095 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2096 /* 2097 Not great since it makes two copies of the submatrix, first an SeqAIJ 2098 in local and then by concatenating the local matrices the end result. 2099 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2100 */ 2101 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2102 { 2103 PetscErrorCode ierr; 2104 PetscMPIInt rank,size; 2105 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2106 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2107 Mat *local,M,Mreuse; 2108 PetscScalar *vwork,*aa; 2109 MPI_Comm comm = mat->comm; 2110 Mat_SeqAIJ *aij; 2111 2112 2113 PetscFunctionBegin; 2114 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2115 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2116 2117 if (call == MAT_REUSE_MATRIX) { 2118 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2119 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2120 local = &Mreuse; 2121 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2122 } else { 2123 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2124 Mreuse = *local; 2125 ierr = PetscFree(local);CHKERRQ(ierr); 2126 } 2127 2128 /* 2129 m - number of local rows 2130 n - number of columns (same on all processors) 2131 rstart - first row in new global matrix generated 2132 */ 2133 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2134 if (call == MAT_INITIAL_MATRIX) { 2135 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2136 ii = aij->i; 2137 jj = aij->j; 2138 2139 /* 2140 Determine the number of non-zeros in the diagonal and off-diagonal 2141 portions of the matrix in order to do correct preallocation 2142 */ 2143 2144 /* first get start and end of "diagonal" columns */ 2145 if (csize == PETSC_DECIDE) { 2146 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2147 if (mglobal == n) { /* square matrix */ 2148 nlocal = m; 2149 } else { 2150 nlocal = n/size + ((n % size) > rank); 2151 } 2152 } else { 2153 nlocal = csize; 2154 } 2155 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2156 rstart = rend - nlocal; 2157 if (rank == size - 1 && rend != n) { 2158 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2159 } 2160 2161 /* next, compute all the lengths */ 2162 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2163 olens = dlens + m; 2164 for (i=0; i<m; i++) { 2165 jend = ii[i+1] - ii[i]; 2166 olen = 0; 2167 dlen = 0; 2168 for (j=0; j<jend; j++) { 2169 if (*jj < rstart || *jj >= rend) olen++; 2170 else dlen++; 2171 jj++; 2172 } 2173 olens[i] = olen; 2174 dlens[i] = dlen; 2175 } 2176 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2177 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2178 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2179 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2180 ierr = PetscFree(dlens);CHKERRQ(ierr); 2181 } else { 2182 PetscInt ml,nl; 2183 2184 M = *newmat; 2185 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2186 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2187 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2188 /* 2189 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2190 rather than the slower MatSetValues(). 2191 */ 2192 M->was_assembled = PETSC_TRUE; 2193 M->assembled = PETSC_FALSE; 2194 } 2195 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2196 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2197 ii = aij->i; 2198 jj = aij->j; 2199 aa = aij->a; 2200 for (i=0; i<m; i++) { 2201 row = rstart + i; 2202 nz = ii[i+1] - ii[i]; 2203 cwork = jj; jj += nz; 2204 vwork = aa; aa += nz; 2205 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2206 } 2207 2208 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2209 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2210 *newmat = M; 2211 2212 /* save submatrix used in processor for next request */ 2213 if (call == MAT_INITIAL_MATRIX) { 2214 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2215 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2216 } 2217 2218 PetscFunctionReturn(0); 2219 } 2220 2221 EXTERN_C_BEGIN 2222 #undef __FUNCT__ 2223 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2224 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2225 { 2226 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2227 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2228 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2229 const PetscInt *JJ; 2230 PetscScalar *values; 2231 PetscErrorCode ierr; 2232 2233 PetscFunctionBegin; 2234 if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]); 2235 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2236 o_nnz = d_nnz + m; 2237 2238 for (i=0; i<m; i++) { 2239 nnz = I[i+1]- I[i]; 2240 JJ = J + I[i]; 2241 nnz_max = PetscMax(nnz_max,nnz); 2242 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2243 for (j=0; j<nnz; j++) { 2244 if (*JJ >= cstart) break; 2245 JJ++; 2246 } 2247 d = 0; 2248 for (; j<nnz; j++) { 2249 if (*JJ++ >= cend) break; 2250 d++; 2251 } 2252 d_nnz[i] = d; 2253 o_nnz[i] = nnz - d; 2254 } 2255 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2256 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2257 2258 if (v) values = (PetscScalar*)v; 2259 else { 2260 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2261 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2262 } 2263 2264 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2265 for (i=0; i<m; i++) { 2266 ii = i + rstart; 2267 nnz = I[i+1]- I[i]; 2268 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2269 } 2270 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2271 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2272 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2273 2274 if (!v) { 2275 ierr = PetscFree(values);CHKERRQ(ierr); 2276 } 2277 PetscFunctionReturn(0); 2278 } 2279 EXTERN_C_END 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2283 /*@C 2284 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2285 (the default parallel PETSc format). 2286 2287 Collective on MPI_Comm 2288 2289 Input Parameters: 2290 + B - the matrix 2291 . i - the indices into j for the start of each local row (starts with zero) 2292 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2293 - v - optional values in the matrix 2294 2295 Level: developer 2296 2297 .keywords: matrix, aij, compressed row, sparse, parallel 2298 2299 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2300 @*/ 2301 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2302 { 2303 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2304 2305 PetscFunctionBegin; 2306 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2307 if (f) { 2308 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2309 } 2310 PetscFunctionReturn(0); 2311 } 2312 2313 #undef __FUNCT__ 2314 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2315 /*@C 2316 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2317 (the default parallel PETSc format). For good matrix assembly performance 2318 the user should preallocate the matrix storage by setting the parameters 2319 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2320 performance can be increased by more than a factor of 50. 2321 2322 Collective on MPI_Comm 2323 2324 Input Parameters: 2325 + A - the matrix 2326 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2327 (same value is used for all local rows) 2328 . d_nnz - array containing the number of nonzeros in the various rows of the 2329 DIAGONAL portion of the local submatrix (possibly different for each row) 2330 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2331 The size of this array is equal to the number of local rows, i.e 'm'. 2332 You must leave room for the diagonal entry even if it is zero. 2333 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2334 submatrix (same value is used for all local rows). 2335 - o_nnz - array containing the number of nonzeros in the various rows of the 2336 OFF-DIAGONAL portion of the local submatrix (possibly different for 2337 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2338 structure. The size of this array is equal to the number 2339 of local rows, i.e 'm'. 2340 2341 If the *_nnz parameter is given then the *_nz parameter is ignored 2342 2343 The AIJ format (also called the Yale sparse matrix format or 2344 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2345 storage. The stored row and column indices begin with zero. See the users manual for details. 2346 2347 The parallel matrix is partitioned such that the first m0 rows belong to 2348 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2349 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2350 2351 The DIAGONAL portion of the local submatrix of a processor can be defined 2352 as the submatrix which is obtained by extraction the part corresponding 2353 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2354 first row that belongs to the processor, and r2 is the last row belonging 2355 to the this processor. This is a square mxm matrix. The remaining portion 2356 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2357 2358 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2359 2360 Example usage: 2361 2362 Consider the following 8x8 matrix with 34 non-zero values, that is 2363 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2364 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2365 as follows: 2366 2367 .vb 2368 1 2 0 | 0 3 0 | 0 4 2369 Proc0 0 5 6 | 7 0 0 | 8 0 2370 9 0 10 | 11 0 0 | 12 0 2371 ------------------------------------- 2372 13 0 14 | 15 16 17 | 0 0 2373 Proc1 0 18 0 | 19 20 21 | 0 0 2374 0 0 0 | 22 23 0 | 24 0 2375 ------------------------------------- 2376 Proc2 25 26 27 | 0 0 28 | 29 0 2377 30 0 0 | 31 32 33 | 0 34 2378 .ve 2379 2380 This can be represented as a collection of submatrices as: 2381 2382 .vb 2383 A B C 2384 D E F 2385 G H I 2386 .ve 2387 2388 Where the submatrices A,B,C are owned by proc0, D,E,F are 2389 owned by proc1, G,H,I are owned by proc2. 2390 2391 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2392 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2393 The 'M','N' parameters are 8,8, and have the same values on all procs. 2394 2395 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2396 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2397 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2398 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2399 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2400 matrix, ans [DF] as another SeqAIJ matrix. 2401 2402 When d_nz, o_nz parameters are specified, d_nz storage elements are 2403 allocated for every row of the local diagonal submatrix, and o_nz 2404 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2405 One way to choose d_nz and o_nz is to use the max nonzerors per local 2406 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2407 In this case, the values of d_nz,o_nz are: 2408 .vb 2409 proc0 : dnz = 2, o_nz = 2 2410 proc1 : dnz = 3, o_nz = 2 2411 proc2 : dnz = 1, o_nz = 4 2412 .ve 2413 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2414 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2415 for proc3. i.e we are using 12+15+10=37 storage locations to store 2416 34 values. 2417 2418 When d_nnz, o_nnz parameters are specified, the storage is specified 2419 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2420 In the above case the values for d_nnz,o_nnz are: 2421 .vb 2422 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2423 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2424 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2425 .ve 2426 Here the space allocated is sum of all the above values i.e 34, and 2427 hence pre-allocation is perfect. 2428 2429 Level: intermediate 2430 2431 .keywords: matrix, aij, compressed row, sparse, parallel 2432 2433 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2434 MPIAIJ 2435 @*/ 2436 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2437 { 2438 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2439 2440 PetscFunctionBegin; 2441 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2442 if (f) { 2443 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2444 } 2445 PetscFunctionReturn(0); 2446 } 2447 2448 #undef __FUNCT__ 2449 #define __FUNCT__ "MatCreateMPIAIJ" 2450 /*@C 2451 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2452 (the default parallel PETSc format). For good matrix assembly performance 2453 the user should preallocate the matrix storage by setting the parameters 2454 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2455 performance can be increased by more than a factor of 50. 2456 2457 Collective on MPI_Comm 2458 2459 Input Parameters: 2460 + comm - MPI communicator 2461 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2462 This value should be the same as the local size used in creating the 2463 y vector for the matrix-vector product y = Ax. 2464 . n - This value should be the same as the local size used in creating the 2465 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2466 calculated if N is given) For square matrices n is almost always m. 2467 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2468 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2469 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2470 (same value is used for all local rows) 2471 . d_nnz - array containing the number of nonzeros in the various rows of the 2472 DIAGONAL portion of the local submatrix (possibly different for each row) 2473 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2474 The size of this array is equal to the number of local rows, i.e 'm'. 2475 You must leave room for the diagonal entry even if it is zero. 2476 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2477 submatrix (same value is used for all local rows). 2478 - o_nnz - array containing the number of nonzeros in the various rows of the 2479 OFF-DIAGONAL portion of the local submatrix (possibly different for 2480 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2481 structure. The size of this array is equal to the number 2482 of local rows, i.e 'm'. 2483 2484 Output Parameter: 2485 . A - the matrix 2486 2487 Notes: 2488 If the *_nnz parameter is given then the *_nz parameter is ignored 2489 2490 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2491 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2492 storage requirements for this matrix. 2493 2494 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2495 processor than it must be used on all processors that share the object for 2496 that argument. 2497 2498 The user MUST specify either the local or global matrix dimensions 2499 (possibly both). 2500 2501 The parallel matrix is partitioned such that the first m0 rows belong to 2502 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2503 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2504 2505 The DIAGONAL portion of the local submatrix of a processor can be defined 2506 as the submatrix which is obtained by extraction the part corresponding 2507 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2508 first row that belongs to the processor, and r2 is the last row belonging 2509 to the this processor. This is a square mxm matrix. The remaining portion 2510 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2511 2512 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2513 2514 When calling this routine with a single process communicator, a matrix of 2515 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2516 type of communicator, use the construction mechanism: 2517 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2518 2519 By default, this format uses inodes (identical nodes) when possible. 2520 We search for consecutive rows with the same nonzero structure, thereby 2521 reusing matrix information to achieve increased efficiency. 2522 2523 Options Database Keys: 2524 + -mat_no_inode - Do not use inodes 2525 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2526 - -mat_aij_oneindex - Internally use indexing starting at 1 2527 rather than 0. Note that when calling MatSetValues(), 2528 the user still MUST index entries starting at 0! 2529 2530 2531 Example usage: 2532 2533 Consider the following 8x8 matrix with 34 non-zero values, that is 2534 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2535 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2536 as follows: 2537 2538 .vb 2539 1 2 0 | 0 3 0 | 0 4 2540 Proc0 0 5 6 | 7 0 0 | 8 0 2541 9 0 10 | 11 0 0 | 12 0 2542 ------------------------------------- 2543 13 0 14 | 15 16 17 | 0 0 2544 Proc1 0 18 0 | 19 20 21 | 0 0 2545 0 0 0 | 22 23 0 | 24 0 2546 ------------------------------------- 2547 Proc2 25 26 27 | 0 0 28 | 29 0 2548 30 0 0 | 31 32 33 | 0 34 2549 .ve 2550 2551 This can be represented as a collection of submatrices as: 2552 2553 .vb 2554 A B C 2555 D E F 2556 G H I 2557 .ve 2558 2559 Where the submatrices A,B,C are owned by proc0, D,E,F are 2560 owned by proc1, G,H,I are owned by proc2. 2561 2562 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2563 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2564 The 'M','N' parameters are 8,8, and have the same values on all procs. 2565 2566 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2567 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2568 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2569 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2570 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2571 matrix, ans [DF] as another SeqAIJ matrix. 2572 2573 When d_nz, o_nz parameters are specified, d_nz storage elements are 2574 allocated for every row of the local diagonal submatrix, and o_nz 2575 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2576 One way to choose d_nz and o_nz is to use the max nonzerors per local 2577 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2578 In this case, the values of d_nz,o_nz are: 2579 .vb 2580 proc0 : dnz = 2, o_nz = 2 2581 proc1 : dnz = 3, o_nz = 2 2582 proc2 : dnz = 1, o_nz = 4 2583 .ve 2584 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2585 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2586 for proc3. i.e we are using 12+15+10=37 storage locations to store 2587 34 values. 2588 2589 When d_nnz, o_nnz parameters are specified, the storage is specified 2590 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2591 In the above case the values for d_nnz,o_nnz are: 2592 .vb 2593 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2594 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2595 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2596 .ve 2597 Here the space allocated is sum of all the above values i.e 34, and 2598 hence pre-allocation is perfect. 2599 2600 Level: intermediate 2601 2602 .keywords: matrix, aij, compressed row, sparse, parallel 2603 2604 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2605 MPIAIJ 2606 @*/ 2607 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2608 { 2609 PetscErrorCode ierr; 2610 PetscMPIInt size; 2611 2612 PetscFunctionBegin; 2613 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2614 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2615 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2616 if (size > 1) { 2617 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2618 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2619 } else { 2620 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2621 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2622 } 2623 PetscFunctionReturn(0); 2624 } 2625 2626 #undef __FUNCT__ 2627 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2628 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2629 { 2630 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2631 2632 PetscFunctionBegin; 2633 *Ad = a->A; 2634 *Ao = a->B; 2635 *colmap = a->garray; 2636 PetscFunctionReturn(0); 2637 } 2638 2639 #undef __FUNCT__ 2640 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2641 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2642 { 2643 PetscErrorCode ierr; 2644 PetscInt i; 2645 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2646 2647 PetscFunctionBegin; 2648 if (coloring->ctype == IS_COLORING_LOCAL) { 2649 ISColoringValue *allcolors,*colors; 2650 ISColoring ocoloring; 2651 2652 /* set coloring for diagonal portion */ 2653 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2654 2655 /* set coloring for off-diagonal portion */ 2656 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2657 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2658 for (i=0; i<a->B->n; i++) { 2659 colors[i] = allcolors[a->garray[i]]; 2660 } 2661 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2662 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2663 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2664 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2665 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2666 ISColoringValue *colors; 2667 PetscInt *larray; 2668 ISColoring ocoloring; 2669 2670 /* set coloring for diagonal portion */ 2671 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2672 for (i=0; i<a->A->n; i++) { 2673 larray[i] = i + a->cstart; 2674 } 2675 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2676 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2677 for (i=0; i<a->A->n; i++) { 2678 colors[i] = coloring->colors[larray[i]]; 2679 } 2680 ierr = PetscFree(larray);CHKERRQ(ierr); 2681 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2682 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2683 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2684 2685 /* set coloring for off-diagonal portion */ 2686 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2687 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2688 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2689 for (i=0; i<a->B->n; i++) { 2690 colors[i] = coloring->colors[larray[i]]; 2691 } 2692 ierr = PetscFree(larray);CHKERRQ(ierr); 2693 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2694 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2695 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2696 } else { 2697 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2698 } 2699 2700 PetscFunctionReturn(0); 2701 } 2702 2703 #if defined(PETSC_HAVE_ADIC) 2704 #undef __FUNCT__ 2705 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2706 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2707 { 2708 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2709 PetscErrorCode ierr; 2710 2711 PetscFunctionBegin; 2712 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2713 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2714 PetscFunctionReturn(0); 2715 } 2716 #endif 2717 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2720 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2721 { 2722 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2723 PetscErrorCode ierr; 2724 2725 PetscFunctionBegin; 2726 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2727 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2728 PetscFunctionReturn(0); 2729 } 2730 2731 #undef __FUNCT__ 2732 #define __FUNCT__ "MatMerge" 2733 /*@C 2734 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2735 matrices from each processor 2736 2737 Collective on MPI_Comm 2738 2739 Input Parameters: 2740 + comm - the communicators the parallel matrix will live on 2741 . inmat - the input sequential matrices 2742 . n - number of local columns (or PETSC_DECIDE) 2743 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2744 2745 Output Parameter: 2746 . outmat - the parallel matrix generated 2747 2748 Level: advanced 2749 2750 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2751 2752 @*/ 2753 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2754 { 2755 PetscErrorCode ierr; 2756 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2757 PetscInt *indx; 2758 PetscScalar *values; 2759 PetscMap columnmap,rowmap; 2760 2761 PetscFunctionBegin; 2762 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2763 /* 2764 PetscMPIInt rank; 2765 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2766 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2767 */ 2768 if (scall == MAT_INITIAL_MATRIX){ 2769 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2770 if (n == PETSC_DECIDE){ 2771 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2772 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2773 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2774 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2775 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2776 } 2777 2778 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2779 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2780 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2781 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2782 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2783 2784 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2785 for (i=0;i<m;i++) { 2786 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2787 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2788 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2789 } 2790 /* This routine will ONLY return MPIAIJ type matrix */ 2791 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2792 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2793 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2794 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2795 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2796 2797 } else if (scall == MAT_REUSE_MATRIX){ 2798 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2799 } else { 2800 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2801 } 2802 2803 for (i=0;i<m;i++) { 2804 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2805 I = i + rstart; 2806 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2807 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2808 } 2809 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2810 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2811 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2812 2813 PetscFunctionReturn(0); 2814 } 2815 2816 #undef __FUNCT__ 2817 #define __FUNCT__ "MatFileSplit" 2818 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2819 { 2820 PetscErrorCode ierr; 2821 PetscMPIInt rank; 2822 PetscInt m,N,i,rstart,nnz; 2823 size_t len; 2824 const PetscInt *indx; 2825 PetscViewer out; 2826 char *name; 2827 Mat B; 2828 const PetscScalar *values; 2829 2830 PetscFunctionBegin; 2831 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2832 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2833 /* Should this be the type of the diagonal block of A? */ 2834 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2835 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2836 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2837 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2838 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2839 for (i=0;i<m;i++) { 2840 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2841 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2842 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2843 } 2844 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2845 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2846 2847 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2848 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2849 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2850 sprintf(name,"%s.%d",outfile,rank); 2851 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2852 ierr = PetscFree(name); 2853 ierr = MatView(B,out);CHKERRQ(ierr); 2854 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2855 ierr = MatDestroy(B);CHKERRQ(ierr); 2856 PetscFunctionReturn(0); 2857 } 2858 2859 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2860 #undef __FUNCT__ 2861 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2862 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2863 { 2864 PetscErrorCode ierr; 2865 Mat_Merge_SeqsToMPI *merge; 2866 PetscObjectContainer container; 2867 2868 PetscFunctionBegin; 2869 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2870 if (container) { 2871 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2872 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2873 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2874 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2875 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2876 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2877 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2878 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2879 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2880 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2881 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2882 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2883 2884 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2885 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2886 } 2887 ierr = PetscFree(merge);CHKERRQ(ierr); 2888 2889 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2890 PetscFunctionReturn(0); 2891 } 2892 2893 #include "src/mat/utils/freespace.h" 2894 #include "petscbt.h" 2895 static PetscEvent logkey_seqstompinum = 0; 2896 #undef __FUNCT__ 2897 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2898 /*@C 2899 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2900 matrices from each processor 2901 2902 Collective on MPI_Comm 2903 2904 Input Parameters: 2905 + comm - the communicators the parallel matrix will live on 2906 . seqmat - the input sequential matrices 2907 . m - number of local rows (or PETSC_DECIDE) 2908 . n - number of local columns (or PETSC_DECIDE) 2909 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2910 2911 Output Parameter: 2912 . mpimat - the parallel matrix generated 2913 2914 Level: advanced 2915 2916 Notes: 2917 The dimensions of the sequential matrix in each processor MUST be the same. 2918 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2919 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2920 @*/ 2921 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2922 { 2923 PetscErrorCode ierr; 2924 MPI_Comm comm=mpimat->comm; 2925 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2926 PetscMPIInt size,rank,taga,*len_s; 2927 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2928 PetscInt proc,m; 2929 PetscInt **buf_ri,**buf_rj; 2930 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2931 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2932 MPI_Request *s_waits,*r_waits; 2933 MPI_Status *status; 2934 MatScalar *aa=a->a,**abuf_r,*ba_i; 2935 Mat_Merge_SeqsToMPI *merge; 2936 PetscObjectContainer container; 2937 2938 PetscFunctionBegin; 2939 if (!logkey_seqstompinum) { 2940 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2941 } 2942 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2943 2944 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2945 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2946 2947 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2948 if (container) { 2949 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2950 } 2951 bi = merge->bi; 2952 bj = merge->bj; 2953 buf_ri = merge->buf_ri; 2954 buf_rj = merge->buf_rj; 2955 2956 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2957 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2958 len_s = merge->len_s; 2959 2960 /* send and recv matrix values */ 2961 /*-----------------------------*/ 2962 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2963 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2964 2965 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2966 for (proc=0,k=0; proc<size; proc++){ 2967 if (!len_s[proc]) continue; 2968 i = owners[proc]; 2969 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2970 k++; 2971 } 2972 2973 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 2974 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 2975 ierr = PetscFree(status);CHKERRQ(ierr); 2976 2977 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2978 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2979 2980 /* insert mat values of mpimat */ 2981 /*----------------------------*/ 2982 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2983 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2984 nextrow = buf_ri_k + merge->nrecv; 2985 nextai = nextrow + merge->nrecv; 2986 2987 for (k=0; k<merge->nrecv; k++){ 2988 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2989 nrows = *(buf_ri_k[k]); 2990 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2991 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2992 } 2993 2994 /* set values of ba */ 2995 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2996 for (i=0; i<m; i++) { 2997 arow = owners[rank] + i; 2998 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2999 bnzi = bi[i+1] - bi[i]; 3000 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3001 3002 /* add local non-zero vals of this proc's seqmat into ba */ 3003 anzi = ai[arow+1] - ai[arow]; 3004 aj = a->j + ai[arow]; 3005 aa = a->a + ai[arow]; 3006 nextaj = 0; 3007 for (j=0; nextaj<anzi; j++){ 3008 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3009 ba_i[j] += aa[nextaj++]; 3010 } 3011 } 3012 3013 /* add received vals into ba */ 3014 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3015 /* i-th row */ 3016 if (i == *nextrow[k]) { 3017 anzi = *(nextai[k]+1) - *nextai[k]; 3018 aj = buf_rj[k] + *(nextai[k]); 3019 aa = abuf_r[k] + *(nextai[k]); 3020 nextaj = 0; 3021 for (j=0; nextaj<anzi; j++){ 3022 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3023 ba_i[j] += aa[nextaj++]; 3024 } 3025 } 3026 nextrow[k]++; nextai[k]++; 3027 } 3028 } 3029 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3030 } 3031 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3032 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3033 3034 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3035 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3036 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3037 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3038 PetscFunctionReturn(0); 3039 } 3040 3041 static PetscEvent logkey_seqstompisym = 0; 3042 #undef __FUNCT__ 3043 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3044 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3045 { 3046 PetscErrorCode ierr; 3047 Mat B_mpi; 3048 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3049 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3050 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3051 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3052 PetscInt len,proc,*dnz,*onz; 3053 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3054 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3055 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3056 MPI_Status *status; 3057 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3058 PetscBT lnkbt; 3059 Mat_Merge_SeqsToMPI *merge; 3060 PetscObjectContainer container; 3061 3062 PetscFunctionBegin; 3063 if (!logkey_seqstompisym) { 3064 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3065 } 3066 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3067 3068 /* make sure it is a PETSc comm */ 3069 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3070 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3071 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3072 3073 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3074 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3075 3076 /* determine row ownership */ 3077 /*---------------------------------------------------------*/ 3078 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3079 if (m == PETSC_DECIDE) { 3080 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3081 } else { 3082 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3083 } 3084 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3085 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3086 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3087 3088 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3089 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3090 3091 /* determine the number of messages to send, their lengths */ 3092 /*---------------------------------------------------------*/ 3093 len_s = merge->len_s; 3094 3095 len = 0; /* length of buf_si[] */ 3096 merge->nsend = 0; 3097 for (proc=0; proc<size; proc++){ 3098 len_si[proc] = 0; 3099 if (proc == rank){ 3100 len_s[proc] = 0; 3101 } else { 3102 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3103 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3104 } 3105 if (len_s[proc]) { 3106 merge->nsend++; 3107 nrows = 0; 3108 for (i=owners[proc]; i<owners[proc+1]; i++){ 3109 if (ai[i+1] > ai[i]) nrows++; 3110 } 3111 len_si[proc] = 2*(nrows+1); 3112 len += len_si[proc]; 3113 } 3114 } 3115 3116 /* determine the number and length of messages to receive for ij-structure */ 3117 /*-------------------------------------------------------------------------*/ 3118 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3119 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3120 3121 /* post the Irecv of j-structure */ 3122 /*-------------------------------*/ 3123 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3124 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3125 3126 /* post the Isend of j-structure */ 3127 /*--------------------------------*/ 3128 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3129 sj_waits = si_waits + merge->nsend; 3130 3131 for (proc=0, k=0; proc<size; proc++){ 3132 if (!len_s[proc]) continue; 3133 i = owners[proc]; 3134 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3135 k++; 3136 } 3137 3138 /* receives and sends of j-structure are complete */ 3139 /*------------------------------------------------*/ 3140 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3141 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3142 3143 /* send and recv i-structure */ 3144 /*---------------------------*/ 3145 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3146 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3147 3148 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3149 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3150 for (proc=0,k=0; proc<size; proc++){ 3151 if (!len_s[proc]) continue; 3152 /* form outgoing message for i-structure: 3153 buf_si[0]: nrows to be sent 3154 [1:nrows]: row index (global) 3155 [nrows+1:2*nrows+1]: i-structure index 3156 */ 3157 /*-------------------------------------------*/ 3158 nrows = len_si[proc]/2 - 1; 3159 buf_si_i = buf_si + nrows+1; 3160 buf_si[0] = nrows; 3161 buf_si_i[0] = 0; 3162 nrows = 0; 3163 for (i=owners[proc]; i<owners[proc+1]; i++){ 3164 anzi = ai[i+1] - ai[i]; 3165 if (anzi) { 3166 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3167 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3168 nrows++; 3169 } 3170 } 3171 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3172 k++; 3173 buf_si += len_si[proc]; 3174 } 3175 3176 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3177 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3178 3179 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr); 3180 for (i=0; i<merge->nrecv; i++){ 3181 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr); 3182 } 3183 3184 ierr = PetscFree(len_si);CHKERRQ(ierr); 3185 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3186 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3187 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3188 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3189 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3190 ierr = PetscFree(status);CHKERRQ(ierr); 3191 3192 /* compute a local seq matrix in each processor */ 3193 /*----------------------------------------------*/ 3194 /* allocate bi array and free space for accumulating nonzero column info */ 3195 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3196 bi[0] = 0; 3197 3198 /* create and initialize a linked list */ 3199 nlnk = N+1; 3200 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3201 3202 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3203 len = 0; 3204 len = ai[owners[rank+1]] - ai[owners[rank]]; 3205 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3206 current_space = free_space; 3207 3208 /* determine symbolic info for each local row */ 3209 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3210 nextrow = buf_ri_k + merge->nrecv; 3211 nextai = nextrow + merge->nrecv; 3212 for (k=0; k<merge->nrecv; k++){ 3213 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3214 nrows = *buf_ri_k[k]; 3215 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3216 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3217 } 3218 3219 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3220 len = 0; 3221 for (i=0;i<m;i++) { 3222 bnzi = 0; 3223 /* add local non-zero cols of this proc's seqmat into lnk */ 3224 arow = owners[rank] + i; 3225 anzi = ai[arow+1] - ai[arow]; 3226 aj = a->j + ai[arow]; 3227 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3228 bnzi += nlnk; 3229 /* add received col data into lnk */ 3230 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3231 if (i == *nextrow[k]) { /* i-th row */ 3232 anzi = *(nextai[k]+1) - *nextai[k]; 3233 aj = buf_rj[k] + *nextai[k]; 3234 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3235 bnzi += nlnk; 3236 nextrow[k]++; nextai[k]++; 3237 } 3238 } 3239 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3240 3241 /* if free space is not available, make more free space */ 3242 if (current_space->local_remaining<bnzi) { 3243 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3244 nspacedouble++; 3245 } 3246 /* copy data into free space, then initialize lnk */ 3247 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3248 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3249 3250 current_space->array += bnzi; 3251 current_space->local_used += bnzi; 3252 current_space->local_remaining -= bnzi; 3253 3254 bi[i+1] = bi[i] + bnzi; 3255 } 3256 3257 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3258 3259 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3260 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3261 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3262 3263 /* create symbolic parallel matrix B_mpi */ 3264 /*---------------------------------------*/ 3265 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3266 if (n==PETSC_DECIDE) { 3267 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3268 } else { 3269 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3270 } 3271 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3272 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3273 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3274 3275 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3276 B_mpi->assembled = PETSC_FALSE; 3277 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3278 merge->bi = bi; 3279 merge->bj = bj; 3280 merge->buf_ri = buf_ri; 3281 merge->buf_rj = buf_rj; 3282 merge->coi = PETSC_NULL; 3283 merge->coj = PETSC_NULL; 3284 merge->owners_co = PETSC_NULL; 3285 3286 /* attach the supporting struct to B_mpi for reuse */ 3287 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3288 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3289 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3290 *mpimat = B_mpi; 3291 3292 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3293 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3294 PetscFunctionReturn(0); 3295 } 3296 3297 static PetscEvent logkey_seqstompi = 0; 3298 #undef __FUNCT__ 3299 #define __FUNCT__ "MatMerge_SeqsToMPI" 3300 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3301 { 3302 PetscErrorCode ierr; 3303 3304 PetscFunctionBegin; 3305 if (!logkey_seqstompi) { 3306 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3307 } 3308 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3309 if (scall == MAT_INITIAL_MATRIX){ 3310 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3311 } 3312 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3313 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3314 PetscFunctionReturn(0); 3315 } 3316 static PetscEvent logkey_getlocalmat = 0; 3317 #undef __FUNCT__ 3318 #define __FUNCT__ "MatGetLocalMat" 3319 /*@C 3320 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3321 3322 Not Collective 3323 3324 Input Parameters: 3325 + A - the matrix 3326 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3327 3328 Output Parameter: 3329 . A_loc - the local sequential matrix generated 3330 3331 Level: developer 3332 3333 @*/ 3334 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3335 { 3336 PetscErrorCode ierr; 3337 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3338 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3339 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3340 PetscScalar *aa=a->a,*ba=b->a,*ca; 3341 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3342 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3343 3344 PetscFunctionBegin; 3345 if (!logkey_getlocalmat) { 3346 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3347 } 3348 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3349 if (scall == MAT_INITIAL_MATRIX){ 3350 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3351 ci[0] = 0; 3352 for (i=0; i<am; i++){ 3353 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3354 } 3355 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3356 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3357 k = 0; 3358 for (i=0; i<am; i++) { 3359 ncols_o = bi[i+1] - bi[i]; 3360 ncols_d = ai[i+1] - ai[i]; 3361 /* off-diagonal portion of A */ 3362 for (jo=0; jo<ncols_o; jo++) { 3363 col = cmap[*bj]; 3364 if (col >= cstart) break; 3365 cj[k] = col; bj++; 3366 ca[k++] = *ba++; 3367 } 3368 /* diagonal portion of A */ 3369 for (j=0; j<ncols_d; j++) { 3370 cj[k] = cstart + *aj++; 3371 ca[k++] = *aa++; 3372 } 3373 /* off-diagonal portion of A */ 3374 for (j=jo; j<ncols_o; j++) { 3375 cj[k] = cmap[*bj++]; 3376 ca[k++] = *ba++; 3377 } 3378 } 3379 /* put together the new matrix */ 3380 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3381 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3382 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3383 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3384 mat->freedata = PETSC_TRUE; 3385 mat->nonew = 0; 3386 } else if (scall == MAT_REUSE_MATRIX){ 3387 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3388 ci = mat->i; cj = mat->j; ca = mat->a; 3389 for (i=0; i<am; i++) { 3390 /* off-diagonal portion of A */ 3391 ncols_o = bi[i+1] - bi[i]; 3392 for (jo=0; jo<ncols_o; jo++) { 3393 col = cmap[*bj]; 3394 if (col >= cstart) break; 3395 *ca++ = *ba++; bj++; 3396 } 3397 /* diagonal portion of A */ 3398 ncols_d = ai[i+1] - ai[i]; 3399 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3400 /* off-diagonal portion of A */ 3401 for (j=jo; j<ncols_o; j++) { 3402 *ca++ = *ba++; bj++; 3403 } 3404 } 3405 } else { 3406 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3407 } 3408 3409 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3410 PetscFunctionReturn(0); 3411 } 3412 3413 static PetscEvent logkey_getlocalmatcondensed = 0; 3414 #undef __FUNCT__ 3415 #define __FUNCT__ "MatGetLocalMatCondensed" 3416 /*@C 3417 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3418 3419 Not Collective 3420 3421 Input Parameters: 3422 + A - the matrix 3423 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3424 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3425 3426 Output Parameter: 3427 . A_loc - the local sequential matrix generated 3428 3429 Level: developer 3430 3431 @*/ 3432 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3433 { 3434 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3435 PetscErrorCode ierr; 3436 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3437 IS isrowa,iscola; 3438 Mat *aloc; 3439 3440 PetscFunctionBegin; 3441 if (!logkey_getlocalmatcondensed) { 3442 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3443 } 3444 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3445 if (!row){ 3446 start = a->rstart; end = a->rend; 3447 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3448 } else { 3449 isrowa = *row; 3450 } 3451 if (!col){ 3452 start = a->cstart; 3453 cmap = a->garray; 3454 nzA = a->A->n; 3455 nzB = a->B->n; 3456 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3457 ncols = 0; 3458 for (i=0; i<nzB; i++) { 3459 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3460 else break; 3461 } 3462 imark = i; 3463 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3464 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3465 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3466 ierr = PetscFree(idx);CHKERRQ(ierr); 3467 } else { 3468 iscola = *col; 3469 } 3470 if (scall != MAT_INITIAL_MATRIX){ 3471 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3472 aloc[0] = *A_loc; 3473 } 3474 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3475 *A_loc = aloc[0]; 3476 ierr = PetscFree(aloc);CHKERRQ(ierr); 3477 if (!row){ 3478 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3479 } 3480 if (!col){ 3481 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3482 } 3483 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3484 PetscFunctionReturn(0); 3485 } 3486 3487 static PetscEvent logkey_GetBrowsOfAcols = 0; 3488 #undef __FUNCT__ 3489 #define __FUNCT__ "MatGetBrowsOfAcols" 3490 /*@C 3491 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3492 3493 Collective on Mat 3494 3495 Input Parameters: 3496 + A,B - the matrices in mpiaij format 3497 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3498 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3499 3500 Output Parameter: 3501 + rowb, colb - index sets of rows and columns of B to extract 3502 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3503 - B_seq - the sequential matrix generated 3504 3505 Level: developer 3506 3507 @*/ 3508 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3509 { 3510 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3511 PetscErrorCode ierr; 3512 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3513 IS isrowb,iscolb; 3514 Mat *bseq; 3515 3516 PetscFunctionBegin; 3517 if (a->cstart != b->rstart || a->cend != b->rend){ 3518 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3519 } 3520 if (!logkey_GetBrowsOfAcols) { 3521 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3522 } 3523 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3524 3525 if (scall == MAT_INITIAL_MATRIX){ 3526 start = a->cstart; 3527 cmap = a->garray; 3528 nzA = a->A->n; 3529 nzB = a->B->n; 3530 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3531 ncols = 0; 3532 for (i=0; i<nzB; i++) { /* row < local row index */ 3533 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3534 else break; 3535 } 3536 imark = i; 3537 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3538 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3539 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3540 ierr = PetscFree(idx);CHKERRQ(ierr); 3541 *brstart = imark; 3542 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3543 } else { 3544 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3545 isrowb = *rowb; iscolb = *colb; 3546 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3547 bseq[0] = *B_seq; 3548 } 3549 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3550 *B_seq = bseq[0]; 3551 ierr = PetscFree(bseq);CHKERRQ(ierr); 3552 if (!rowb){ 3553 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3554 } else { 3555 *rowb = isrowb; 3556 } 3557 if (!colb){ 3558 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3559 } else { 3560 *colb = iscolb; 3561 } 3562 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3563 PetscFunctionReturn(0); 3564 } 3565 3566 static PetscEvent logkey_GetBrowsOfAocols = 0; 3567 #undef __FUNCT__ 3568 #define __FUNCT__ "MatGetBrowsOfAoCols" 3569 /*@C 3570 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3571 of the OFF-DIAGONAL portion of local A 3572 3573 Collective on Mat 3574 3575 Input Parameters: 3576 + A,B - the matrices in mpiaij format 3577 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3578 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3579 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3580 3581 Output Parameter: 3582 + B_oth - the sequential matrix generated 3583 3584 Level: developer 3585 3586 @*/ 3587 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3588 { 3589 VecScatter_MPI_General *gen_to,*gen_from; 3590 PetscErrorCode ierr; 3591 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3592 Mat_SeqAIJ *b_oth; 3593 VecScatter ctx=a->Mvctx; 3594 MPI_Comm comm=ctx->comm; 3595 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3596 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3597 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3598 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3599 MPI_Request *rwaits,*swaits; 3600 MPI_Status *sstatus,rstatus; 3601 PetscInt *cols; 3602 PetscScalar *vals; 3603 PetscMPIInt j; 3604 3605 PetscFunctionBegin; 3606 if (a->cstart != b->rstart || a->cend != b->rend){ 3607 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3608 } 3609 if (!logkey_GetBrowsOfAocols) { 3610 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3611 } 3612 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3613 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3614 3615 gen_to = (VecScatter_MPI_General*)ctx->todata; 3616 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3617 rvalues = gen_from->values; /* holds the length of sending row */ 3618 svalues = gen_to->values; /* holds the length of receiving row */ 3619 nrecvs = gen_from->n; 3620 nsends = gen_to->n; 3621 rwaits = gen_from->requests; 3622 swaits = gen_to->requests; 3623 srow = gen_to->indices; /* local row index to be sent */ 3624 rstarts = gen_from->starts; 3625 sstarts = gen_to->starts; 3626 rprocs = gen_from->procs; 3627 sprocs = gen_to->procs; 3628 sstatus = gen_to->sstatus; 3629 3630 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3631 if (scall == MAT_INITIAL_MATRIX){ 3632 /* i-array */ 3633 /*---------*/ 3634 /* post receives */ 3635 for (i=0; i<nrecvs; i++){ 3636 rowlen = (PetscInt*)rvalues + rstarts[i]; 3637 nrows = rstarts[i+1]-rstarts[i]; 3638 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3639 } 3640 3641 /* pack the outgoing message */ 3642 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3643 rstartsj = sstartsj + nsends +1; 3644 sstartsj[0] = 0; rstartsj[0] = 0; 3645 len = 0; /* total length of j or a array to be sent */ 3646 k = 0; 3647 for (i=0; i<nsends; i++){ 3648 rowlen = (PetscInt*)svalues + sstarts[i]; 3649 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3650 for (j=0; j<nrows; j++) { 3651 row = srow[k] + b->rowners[rank]; /* global row idx */ 3652 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3653 len += rowlen[j]; 3654 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3655 k++; 3656 } 3657 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3658 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3659 } 3660 /* recvs and sends of i-array are completed */ 3661 i = nrecvs; 3662 while (i--) { 3663 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3664 } 3665 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3666 /* allocate buffers for sending j and a arrays */ 3667 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3668 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3669 3670 /* create i-array of B_oth */ 3671 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3672 b_othi[0] = 0; 3673 len = 0; /* total length of j or a array to be received */ 3674 k = 0; 3675 for (i=0; i<nrecvs; i++){ 3676 rowlen = (PetscInt*)rvalues + rstarts[i]; 3677 nrows = rstarts[i+1]-rstarts[i]; 3678 for (j=0; j<nrows; j++) { 3679 b_othi[k+1] = b_othi[k] + rowlen[j]; 3680 len += rowlen[j]; k++; 3681 } 3682 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3683 } 3684 3685 /* allocate space for j and a arrrays of B_oth */ 3686 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3687 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3688 3689 /* j-array */ 3690 /*---------*/ 3691 /* post receives of j-array */ 3692 for (i=0; i<nrecvs; i++){ 3693 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3694 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3695 } 3696 k = 0; 3697 for (i=0; i<nsends; i++){ 3698 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3699 bufJ = bufj+sstartsj[i]; 3700 for (j=0; j<nrows; j++) { 3701 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3702 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3703 for (l=0; l<ncols; l++){ 3704 *bufJ++ = cols[l]; 3705 } 3706 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3707 } 3708 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3709 } 3710 3711 /* recvs and sends of j-array are completed */ 3712 i = nrecvs; 3713 while (i--) { 3714 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3715 } 3716 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3717 } else if (scall == MAT_REUSE_MATRIX){ 3718 sstartsj = *startsj; 3719 rstartsj = sstartsj + nsends +1; 3720 bufa = *bufa_ptr; 3721 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3722 b_otha = b_oth->a; 3723 } else { 3724 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3725 } 3726 3727 /* a-array */ 3728 /*---------*/ 3729 /* post receives of a-array */ 3730 for (i=0; i<nrecvs; i++){ 3731 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3732 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3733 } 3734 k = 0; 3735 for (i=0; i<nsends; i++){ 3736 nrows = sstarts[i+1]-sstarts[i]; 3737 bufA = bufa+sstartsj[i]; 3738 for (j=0; j<nrows; j++) { 3739 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3740 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3741 for (l=0; l<ncols; l++){ 3742 *bufA++ = vals[l]; 3743 } 3744 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3745 3746 } 3747 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3748 } 3749 /* recvs and sends of a-array are completed */ 3750 i = nrecvs; 3751 while (i--) { 3752 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3753 } 3754 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3755 3756 if (scall == MAT_INITIAL_MATRIX){ 3757 /* put together the new matrix */ 3758 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3759 3760 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3761 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3762 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3763 b_oth->freedata = PETSC_TRUE; 3764 b_oth->nonew = 0; 3765 3766 ierr = PetscFree(bufj);CHKERRQ(ierr); 3767 if (!startsj || !bufa_ptr){ 3768 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3769 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3770 } else { 3771 *startsj = sstartsj; 3772 *bufa_ptr = bufa; 3773 } 3774 } 3775 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3776 3777 PetscFunctionReturn(0); 3778 } 3779 3780 /*MC 3781 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3782 3783 Options Database Keys: 3784 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3785 3786 Level: beginner 3787 3788 .seealso: MatCreateMPIAIJ 3789 M*/ 3790 3791 EXTERN_C_BEGIN 3792 #undef __FUNCT__ 3793 #define __FUNCT__ "MatCreate_MPIAIJ" 3794 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3795 { 3796 Mat_MPIAIJ *b; 3797 PetscErrorCode ierr; 3798 PetscInt i; 3799 PetscMPIInt size; 3800 3801 PetscFunctionBegin; 3802 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3803 3804 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3805 B->data = (void*)b; 3806 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3807 B->factor = 0; 3808 B->bs = 1; 3809 B->assembled = PETSC_FALSE; 3810 B->mapping = 0; 3811 3812 B->insertmode = NOT_SET_VALUES; 3813 b->size = size; 3814 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3815 3816 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3817 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3818 3819 /* the information in the maps duplicates the information computed below, eventually 3820 we should remove the duplicate information that is not contained in the maps */ 3821 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3822 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3823 3824 /* build local table of row and column ownerships */ 3825 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3826 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3827 b->cowners = b->rowners + b->size + 2; 3828 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3829 b->rowners[0] = 0; 3830 for (i=2; i<=b->size; i++) { 3831 b->rowners[i] += b->rowners[i-1]; 3832 } 3833 b->rstart = b->rowners[b->rank]; 3834 b->rend = b->rowners[b->rank+1]; 3835 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3836 b->cowners[0] = 0; 3837 for (i=2; i<=b->size; i++) { 3838 b->cowners[i] += b->cowners[i-1]; 3839 } 3840 b->cstart = b->cowners[b->rank]; 3841 b->cend = b->cowners[b->rank+1]; 3842 3843 /* build cache for off array entries formed */ 3844 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3845 b->donotstash = PETSC_FALSE; 3846 b->colmap = 0; 3847 b->garray = 0; 3848 b->roworiented = PETSC_TRUE; 3849 3850 /* stuff used for matrix vector multiply */ 3851 b->lvec = PETSC_NULL; 3852 b->Mvctx = PETSC_NULL; 3853 3854 /* stuff for MatGetRow() */ 3855 b->rowindices = 0; 3856 b->rowvalues = 0; 3857 b->getrowactive = PETSC_FALSE; 3858 3859 /* Explicitly create 2 MATSEQAIJ matrices. */ 3860 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3861 ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr); 3862 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3863 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3864 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3865 ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr); 3866 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3867 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3868 3869 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3870 "MatStoreValues_MPIAIJ", 3871 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3872 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3873 "MatRetrieveValues_MPIAIJ", 3874 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3875 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3876 "MatGetDiagonalBlock_MPIAIJ", 3877 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3878 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3879 "MatIsTranspose_MPIAIJ", 3880 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3881 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3882 "MatMPIAIJSetPreallocation_MPIAIJ", 3883 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3884 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3885 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3886 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3887 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3888 "MatDiagonalScaleLocal_MPIAIJ", 3889 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3890 PetscFunctionReturn(0); 3891 } 3892 EXTERN_C_END 3893