1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 876 { 877 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 880 PetscInt i,*owners = A->rmap->range; 881 PetscInt *nprocs,j,idx,nsends; 882 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 883 PetscInt *rvalues,count,base,slen,*source; 884 PetscInt *lens,*lrows,*values,m; 885 MPI_Comm comm = ((PetscObject)A)->comm; 886 MPI_Request *send_waits,*recv_waits; 887 MPI_Status recv_status,*send_status; 888 const PetscScalar *xx; 889 PetscScalar *bb,*mask; 890 Vec xmask,lmask; 891 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 892 const PetscInt *aj, *ii,*ridx; 893 PetscScalar *aa; 894 #if defined(PETSC_DEBUG) 895 PetscBool found = PETSC_FALSE; 896 #endif 897 898 PetscFunctionBegin; 899 /* first count number of contributors to each processor */ 900 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 901 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 902 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 903 j = 0; 904 for (i=0; i<N; i++) { 905 if (lastidx > (idx = rows[i])) j = 0; 906 lastidx = idx; 907 for (; j<size; j++) { 908 if (idx >= owners[j] && idx < owners[j+1]) { 909 nprocs[2*j]++; 910 nprocs[2*j+1] = 1; 911 owner[i] = j; 912 #if defined(PETSC_DEBUG) 913 found = PETSC_TRUE; 914 #endif 915 break; 916 } 917 } 918 #if defined(PETSC_DEBUG) 919 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 920 found = PETSC_FALSE; 921 #endif 922 } 923 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 924 925 /* inform other processors of number of messages and max length*/ 926 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 927 928 /* post receives: */ 929 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 930 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 931 for (i=0; i<nrecvs; i++) { 932 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 933 } 934 935 /* do sends: 936 1) starts[i] gives the starting index in svalues for stuff going to 937 the ith processor 938 */ 939 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 940 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 941 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 942 starts[0] = 0; 943 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 944 for (i=0; i<N; i++) { 945 svalues[starts[owner[i]]++] = rows[i]; 946 } 947 948 starts[0] = 0; 949 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 950 count = 0; 951 for (i=0; i<size; i++) { 952 if (nprocs[2*i+1]) { 953 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 954 } 955 } 956 ierr = PetscFree(starts);CHKERRQ(ierr); 957 958 base = owners[rank]; 959 960 /* wait on receives */ 961 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 962 count = nrecvs; slen = 0; 963 while (count) { 964 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 965 /* unpack receives into our local space */ 966 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 967 source[imdex] = recv_status.MPI_SOURCE; 968 lens[imdex] = n; 969 slen += n; 970 count--; 971 } 972 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 973 974 /* move the data into the send scatter */ 975 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 976 count = 0; 977 for (i=0; i<nrecvs; i++) { 978 values = rvalues + i*nmax; 979 for (j=0; j<lens[i]; j++) { 980 lrows[count++] = values[j] - base; 981 } 982 } 983 ierr = PetscFree(rvalues);CHKERRQ(ierr); 984 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 985 ierr = PetscFree(owner);CHKERRQ(ierr); 986 ierr = PetscFree(nprocs);CHKERRQ(ierr); 987 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 988 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 991 992 /* handle off diagonal part of matrix */ 993 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<slen; i++) { 997 bb[lrows[i]] = 1; 998 } 999 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1003 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1006 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1008 1009 /* remove zeroed rows of off diagonal matrix */ 1010 ii = aij->i; 1011 for (i=0; i<slen; i++) { 1012 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1013 } 1014 1015 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1016 if (aij->compressedrow.use){ 1017 m = aij->compressedrow.nrows; 1018 ii = aij->compressedrow.i; 1019 ridx = aij->compressedrow.rindex; 1020 for (i=0; i<m; i++){ 1021 n = ii[i+1] - ii[i]; 1022 aj = aij->j + ii[i]; 1023 aa = aij->a + ii[i]; 1024 1025 for (j=0; j<n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 bb[*ridx] -= *aa*xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 ridx++; 1034 } 1035 } else { /* do not use compressed row format */ 1036 m = l->B->rmap->n; 1037 for (i=0; i<m; i++) { 1038 n = ii[i+1] - ii[i]; 1039 aj = aij->j + ii[i]; 1040 aa = aij->a + ii[i]; 1041 for (j=0; j<n; j++) { 1042 if (PetscAbsScalar(mask[*aj])) { 1043 bb[i] -= *aa*xx[*aj]; 1044 *aa = 0.0; 1045 } 1046 aa++; 1047 aj++; 1048 } 1049 } 1050 } 1051 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1052 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1053 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1054 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1055 ierr = PetscFree(lrows);CHKERRQ(ierr); 1056 1057 /* wait on sends */ 1058 if (nsends) { 1059 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1060 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1061 ierr = PetscFree(send_status);CHKERRQ(ierr); 1062 } 1063 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1064 ierr = PetscFree(svalues);CHKERRQ(ierr); 1065 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMult_MPIAIJ" 1071 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 PetscInt nt; 1076 1077 PetscFunctionBegin; 1078 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1079 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1080 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1082 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1083 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1089 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1101 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1108 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1109 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1116 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 PetscBool merged; 1121 1122 PetscFunctionBegin; 1123 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1124 /* do nondiagonal part */ 1125 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1126 if (!merged) { 1127 /* send it on its way */ 1128 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1129 /* do local part */ 1130 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1131 /* receive remote parts: note this assumes the values are not actually */ 1132 /* added in yy until the next line, */ 1133 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1134 } else { 1135 /* do local part */ 1136 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1137 /* send it on its way */ 1138 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1139 /* values actually were received in the Begin() but we need to call this nop */ 1140 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 } 1142 PetscFunctionReturn(0); 1143 } 1144 1145 EXTERN_C_BEGIN 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1148 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1149 { 1150 MPI_Comm comm; 1151 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1152 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1153 IS Me,Notme; 1154 PetscErrorCode ierr; 1155 PetscInt M,N,first,last,*notme,i; 1156 PetscMPIInt size; 1157 1158 PetscFunctionBegin; 1159 1160 /* Easy test: symmetric diagonal block */ 1161 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1162 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1163 if (!*f) PetscFunctionReturn(0); 1164 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1165 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1166 if (size == 1) PetscFunctionReturn(0); 1167 1168 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1169 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1170 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1171 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1172 for (i=0; i<first; i++) notme[i] = i; 1173 for (i=last; i<M; i++) notme[i-last+first] = i; 1174 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1175 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1176 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1177 Aoff = Aoffs[0]; 1178 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1179 Boff = Boffs[0]; 1180 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1181 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1182 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1183 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1184 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1185 ierr = PetscFree(notme);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 EXTERN_C_END 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1192 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1193 { 1194 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1195 PetscErrorCode ierr; 1196 1197 PetscFunctionBegin; 1198 /* do nondiagonal part */ 1199 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1200 /* send it on its way */ 1201 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1202 /* do local part */ 1203 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1204 /* receive remote parts */ 1205 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 /* 1210 This only works correctly for square matrices where the subblock A->A is the 1211 diagonal block 1212 */ 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1215 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1216 { 1217 PetscErrorCode ierr; 1218 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1219 1220 PetscFunctionBegin; 1221 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1222 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1223 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatScale_MPIAIJ" 1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1230 { 1231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1236 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatDestroy_MPIAIJ" 1242 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1243 { 1244 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 #if defined(PETSC_USE_LOG) 1249 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1250 #endif 1251 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1252 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1253 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1254 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1255 #if defined (PETSC_USE_CTABLE) 1256 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1257 #else 1258 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1259 #endif 1260 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1262 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1263 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1264 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1265 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1266 1267 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1281 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1282 { 1283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1284 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1285 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1286 PetscErrorCode ierr; 1287 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1288 int fd; 1289 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1290 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1291 PetscScalar *column_values; 1292 PetscInt message_count,flowcontrolcount; 1293 1294 PetscFunctionBegin; 1295 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1296 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1297 nz = A->nz + B->nz; 1298 if (!rank) { 1299 header[0] = MAT_FILE_CLASSID; 1300 header[1] = mat->rmap->N; 1301 header[2] = mat->cmap->N; 1302 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1303 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1304 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1305 /* get largest number of rows any processor has */ 1306 rlen = mat->rmap->n; 1307 range = mat->rmap->range; 1308 for (i=1; i<size; i++) { 1309 rlen = PetscMax(rlen,range[i+1] - range[i]); 1310 } 1311 } else { 1312 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1313 rlen = mat->rmap->n; 1314 } 1315 1316 /* load up the local row counts */ 1317 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1318 for (i=0; i<mat->rmap->n; i++) { 1319 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1320 } 1321 1322 /* store the row lengths to the file */ 1323 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1324 if (!rank) { 1325 MPI_Status status; 1326 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1327 for (i=1; i<size; i++) { 1328 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1329 rlen = range[i+1] - range[i]; 1330 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1331 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1332 } 1333 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1334 } else { 1335 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1336 ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1337 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1338 } 1339 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1340 1341 /* load up the local column indices */ 1342 nzmax = nz; /* )th processor needs space a largest processor needs */ 1343 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1344 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1345 cnt = 0; 1346 for (i=0; i<mat->rmap->n; i++) { 1347 for (j=B->i[i]; j<B->i[i+1]; j++) { 1348 if ( (col = garray[B->j[j]]) > cstart) break; 1349 column_indices[cnt++] = col; 1350 } 1351 for (k=A->i[i]; k<A->i[i+1]; k++) { 1352 column_indices[cnt++] = A->j[k] + cstart; 1353 } 1354 for (; j<B->i[i+1]; j++) { 1355 column_indices[cnt++] = garray[B->j[j]]; 1356 } 1357 } 1358 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1359 1360 /* store the column indices to the file */ 1361 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1362 if (!rank) { 1363 MPI_Status status; 1364 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1365 for (i=1; i<size; i++) { 1366 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1367 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1368 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1369 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1370 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1371 } 1372 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1373 } else { 1374 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1375 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1376 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1377 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1378 } 1379 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1380 1381 /* load up the local column values */ 1382 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1383 cnt = 0; 1384 for (i=0; i<mat->rmap->n; i++) { 1385 for (j=B->i[i]; j<B->i[i+1]; j++) { 1386 if ( garray[B->j[j]] > cstart) break; 1387 column_values[cnt++] = B->a[j]; 1388 } 1389 for (k=A->i[i]; k<A->i[i+1]; k++) { 1390 column_values[cnt++] = A->a[k]; 1391 } 1392 for (; j<B->i[i+1]; j++) { 1393 column_values[cnt++] = B->a[j]; 1394 } 1395 } 1396 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1397 1398 /* store the column values to the file */ 1399 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1400 if (!rank) { 1401 MPI_Status status; 1402 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1403 for (i=1; i<size; i++) { 1404 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1405 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1406 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1407 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1408 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1409 } 1410 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1411 } else { 1412 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1413 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1414 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1415 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1416 } 1417 ierr = PetscFree(column_values);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1423 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1424 { 1425 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1426 PetscErrorCode ierr; 1427 PetscMPIInt rank = aij->rank,size = aij->size; 1428 PetscBool isdraw,iascii,isbinary; 1429 PetscViewer sviewer; 1430 PetscViewerFormat format; 1431 1432 PetscFunctionBegin; 1433 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1434 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1435 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1436 if (iascii) { 1437 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1438 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1439 MatInfo info; 1440 PetscBool inodes; 1441 1442 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1443 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1444 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1445 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1446 if (!inodes) { 1447 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1448 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1449 } else { 1450 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1451 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1452 } 1453 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1454 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1455 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1456 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1457 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1458 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1459 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1460 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1463 PetscInt inodecount,inodelimit,*inodes; 1464 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1465 if (inodes) { 1466 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1467 } else { 1468 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1469 } 1470 PetscFunctionReturn(0); 1471 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1472 PetscFunctionReturn(0); 1473 } 1474 } else if (isbinary) { 1475 if (size == 1) { 1476 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1477 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1478 } else { 1479 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1480 } 1481 PetscFunctionReturn(0); 1482 } else if (isdraw) { 1483 PetscDraw draw; 1484 PetscBool isnull; 1485 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1486 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1487 } 1488 1489 if (size == 1) { 1490 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1491 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1492 } else { 1493 /* assemble the entire matrix onto first processor. */ 1494 Mat A; 1495 Mat_SeqAIJ *Aloc; 1496 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1497 MatScalar *a; 1498 1499 if (mat->rmap->N > 1024) { 1500 PetscBool flg = PETSC_FALSE; 1501 1502 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1503 if (!flg) { 1504 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1505 } 1506 } 1507 1508 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1509 if (!rank) { 1510 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1511 } else { 1512 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1513 } 1514 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1515 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1516 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1517 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1518 1519 /* copy over the A part */ 1520 Aloc = (Mat_SeqAIJ*)aij->A->data; 1521 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1522 row = mat->rmap->rstart; 1523 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1524 for (i=0; i<m; i++) { 1525 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1526 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1527 } 1528 aj = Aloc->j; 1529 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1530 1531 /* copy over the B part */ 1532 Aloc = (Mat_SeqAIJ*)aij->B->data; 1533 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1534 row = mat->rmap->rstart; 1535 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1536 ct = cols; 1537 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1538 for (i=0; i<m; i++) { 1539 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1540 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1541 } 1542 ierr = PetscFree(ct);CHKERRQ(ierr); 1543 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1544 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1545 /* 1546 Everyone has to call to draw the matrix since the graphics waits are 1547 synchronized across all processors that share the PetscDraw object 1548 */ 1549 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1550 if (!rank) { 1551 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1552 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1553 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1554 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1555 } 1556 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1557 ierr = MatDestroy(&A);CHKERRQ(ierr); 1558 } 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatView_MPIAIJ" 1564 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1565 { 1566 PetscErrorCode ierr; 1567 PetscBool iascii,isdraw,issocket,isbinary; 1568 1569 PetscFunctionBegin; 1570 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1571 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1572 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1573 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1574 if (iascii || isdraw || isbinary || issocket) { 1575 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1576 } else { 1577 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1578 } 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatSOR_MPIAIJ" 1584 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1585 { 1586 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1587 PetscErrorCode ierr; 1588 Vec bb1 = 0; 1589 PetscBool hasop; 1590 1591 PetscFunctionBegin; 1592 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1593 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1594 } 1595 1596 if (flag == SOR_APPLY_UPPER) { 1597 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 1601 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1602 if (flag & SOR_ZERO_INITIAL_GUESS) { 1603 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1604 its--; 1605 } 1606 1607 while (its--) { 1608 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1610 1611 /* update rhs: bb1 = bb - B*x */ 1612 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1613 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1614 1615 /* local sweep */ 1616 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1617 } 1618 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1619 if (flag & SOR_ZERO_INITIAL_GUESS) { 1620 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1621 its--; 1622 } 1623 while (its--) { 1624 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1626 1627 /* update rhs: bb1 = bb - B*x */ 1628 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1629 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1630 1631 /* local sweep */ 1632 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1633 } 1634 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1635 if (flag & SOR_ZERO_INITIAL_GUESS) { 1636 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1637 its--; 1638 } 1639 while (its--) { 1640 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 1643 /* update rhs: bb1 = bb - B*x */ 1644 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1645 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1646 1647 /* local sweep */ 1648 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1649 } 1650 } else if (flag & SOR_EISENSTAT) { 1651 Vec xx1; 1652 1653 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1654 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1655 1656 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1658 if (!mat->diag) { 1659 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1660 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1661 } 1662 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1663 if (hasop) { 1664 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1665 } else { 1666 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1667 } 1668 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1669 1670 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1671 1672 /* local sweep */ 1673 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1674 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1675 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1676 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1677 1678 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatPermute_MPIAIJ" 1684 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1685 { 1686 MPI_Comm comm,pcomm; 1687 PetscInt first,local_size,nrows; 1688 const PetscInt *rows; 1689 PetscMPIInt size; 1690 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1691 PetscErrorCode ierr; 1692 1693 PetscFunctionBegin; 1694 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1695 /* make a collective version of 'rowp' */ 1696 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1697 if (pcomm==comm) { 1698 crowp = rowp; 1699 } else { 1700 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1701 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1702 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1703 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1704 } 1705 /* collect the global row permutation and invert it */ 1706 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1707 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1708 if (pcomm!=comm) { 1709 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1710 } 1711 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1712 /* get the local target indices */ 1713 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1714 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1715 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1716 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1717 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1718 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1719 /* the column permutation is so much easier; 1720 make a local version of 'colp' and invert it */ 1721 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1722 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1723 if (size==1) { 1724 lcolp = colp; 1725 } else { 1726 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1727 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1728 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1729 } 1730 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1731 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1732 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1733 if (size>1) { 1734 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1735 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1736 } 1737 /* now we just get the submatrix */ 1738 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1739 /* clean up */ 1740 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1741 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1742 PetscFunctionReturn(0); 1743 } 1744 1745 #undef __FUNCT__ 1746 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1748 { 1749 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1750 Mat A = mat->A,B = mat->B; 1751 PetscErrorCode ierr; 1752 PetscReal isend[5],irecv[5]; 1753 1754 PetscFunctionBegin; 1755 info->block_size = 1.0; 1756 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1757 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1758 isend[3] = info->memory; isend[4] = info->mallocs; 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1761 isend[3] += info->memory; isend[4] += info->mallocs; 1762 if (flag == MAT_LOCAL) { 1763 info->nz_used = isend[0]; 1764 info->nz_allocated = isend[1]; 1765 info->nz_unneeded = isend[2]; 1766 info->memory = isend[3]; 1767 info->mallocs = isend[4]; 1768 } else if (flag == MAT_GLOBAL_MAX) { 1769 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1770 info->nz_used = irecv[0]; 1771 info->nz_allocated = irecv[1]; 1772 info->nz_unneeded = irecv[2]; 1773 info->memory = irecv[3]; 1774 info->mallocs = irecv[4]; 1775 } else if (flag == MAT_GLOBAL_SUM) { 1776 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1777 info->nz_used = irecv[0]; 1778 info->nz_allocated = irecv[1]; 1779 info->nz_unneeded = irecv[2]; 1780 info->memory = irecv[3]; 1781 info->mallocs = irecv[4]; 1782 } 1783 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1784 info->fill_ratio_needed = 0; 1785 info->factor_mallocs = 0; 1786 1787 PetscFunctionReturn(0); 1788 } 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "MatSetOption_MPIAIJ" 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1807 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1808 break; 1809 case MAT_ROW_ORIENTED: 1810 a->roworiented = flg; 1811 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1812 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1813 break; 1814 case MAT_NEW_DIAGONALS: 1815 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1816 break; 1817 case MAT_IGNORE_OFF_PROC_ENTRIES: 1818 a->donotstash = flg; 1819 break; 1820 case MAT_SPD: 1821 A->spd_set = PETSC_TRUE; 1822 A->spd = flg; 1823 if (flg) { 1824 A->symmetric = PETSC_TRUE; 1825 A->structurally_symmetric = PETSC_TRUE; 1826 A->symmetric_set = PETSC_TRUE; 1827 A->structurally_symmetric_set = PETSC_TRUE; 1828 } 1829 break; 1830 case MAT_SYMMETRIC: 1831 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1832 break; 1833 case MAT_STRUCTURALLY_SYMMETRIC: 1834 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1835 break; 1836 case MAT_HERMITIAN: 1837 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1838 break; 1839 case MAT_SYMMETRY_ETERNAL: 1840 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1841 break; 1842 default: 1843 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatGetRow_MPIAIJ" 1850 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1851 { 1852 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1853 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1854 PetscErrorCode ierr; 1855 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1856 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1857 PetscInt *cmap,*idx_p; 1858 1859 PetscFunctionBegin; 1860 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1861 mat->getrowactive = PETSC_TRUE; 1862 1863 if (!mat->rowvalues && (idx || v)) { 1864 /* 1865 allocate enough space to hold information from the longest row. 1866 */ 1867 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1868 PetscInt max = 1,tmp; 1869 for (i=0; i<matin->rmap->n; i++) { 1870 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1871 if (max < tmp) { max = tmp; } 1872 } 1873 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1874 } 1875 1876 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1877 lrow = row - rstart; 1878 1879 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1880 if (!v) {pvA = 0; pvB = 0;} 1881 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1882 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1883 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1884 nztot = nzA + nzB; 1885 1886 cmap = mat->garray; 1887 if (v || idx) { 1888 if (nztot) { 1889 /* Sort by increasing column numbers, assuming A and B already sorted */ 1890 PetscInt imark = -1; 1891 if (v) { 1892 *v = v_p = mat->rowvalues; 1893 for (i=0; i<nzB; i++) { 1894 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1895 else break; 1896 } 1897 imark = i; 1898 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1899 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1900 } 1901 if (idx) { 1902 *idx = idx_p = mat->rowindices; 1903 if (imark > -1) { 1904 for (i=0; i<imark; i++) { 1905 idx_p[i] = cmap[cworkB[i]]; 1906 } 1907 } else { 1908 for (i=0; i<nzB; i++) { 1909 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1910 else break; 1911 } 1912 imark = i; 1913 } 1914 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1915 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1916 } 1917 } else { 1918 if (idx) *idx = 0; 1919 if (v) *v = 0; 1920 } 1921 } 1922 *nz = nztot; 1923 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1924 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1925 PetscFunctionReturn(0); 1926 } 1927 1928 #undef __FUNCT__ 1929 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1930 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1931 { 1932 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1933 1934 PetscFunctionBegin; 1935 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1936 aij->getrowactive = PETSC_FALSE; 1937 PetscFunctionReturn(0); 1938 } 1939 1940 #undef __FUNCT__ 1941 #define __FUNCT__ "MatNorm_MPIAIJ" 1942 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1943 { 1944 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1945 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1946 PetscErrorCode ierr; 1947 PetscInt i,j,cstart = mat->cmap->rstart; 1948 PetscReal sum = 0.0; 1949 MatScalar *v; 1950 1951 PetscFunctionBegin; 1952 if (aij->size == 1) { 1953 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1954 } else { 1955 if (type == NORM_FROBENIUS) { 1956 v = amat->a; 1957 for (i=0; i<amat->nz; i++) { 1958 #if defined(PETSC_USE_COMPLEX) 1959 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1960 #else 1961 sum += (*v)*(*v); v++; 1962 #endif 1963 } 1964 v = bmat->a; 1965 for (i=0; i<bmat->nz; i++) { 1966 #if defined(PETSC_USE_COMPLEX) 1967 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1968 #else 1969 sum += (*v)*(*v); v++; 1970 #endif 1971 } 1972 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1973 *norm = PetscSqrtReal(*norm); 1974 } else if (type == NORM_1) { /* max column norm */ 1975 PetscReal *tmp,*tmp2; 1976 PetscInt *jj,*garray = aij->garray; 1977 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1978 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1979 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1980 *norm = 0.0; 1981 v = amat->a; jj = amat->j; 1982 for (j=0; j<amat->nz; j++) { 1983 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1984 } 1985 v = bmat->a; jj = bmat->j; 1986 for (j=0; j<bmat->nz; j++) { 1987 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1988 } 1989 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1990 for (j=0; j<mat->cmap->N; j++) { 1991 if (tmp2[j] > *norm) *norm = tmp2[j]; 1992 } 1993 ierr = PetscFree(tmp);CHKERRQ(ierr); 1994 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1995 } else if (type == NORM_INFINITY) { /* max row norm */ 1996 PetscReal ntemp = 0.0; 1997 for (j=0; j<aij->A->rmap->n; j++) { 1998 v = amat->a + amat->i[j]; 1999 sum = 0.0; 2000 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2001 sum += PetscAbsScalar(*v); v++; 2002 } 2003 v = bmat->a + bmat->i[j]; 2004 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2005 sum += PetscAbsScalar(*v); v++; 2006 } 2007 if (sum > ntemp) ntemp = sum; 2008 } 2009 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2010 } else { 2011 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2012 } 2013 } 2014 PetscFunctionReturn(0); 2015 } 2016 2017 #undef __FUNCT__ 2018 #define __FUNCT__ "MatTranspose_MPIAIJ" 2019 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2020 { 2021 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2022 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2023 PetscErrorCode ierr; 2024 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2025 PetscInt cstart=A->cmap->rstart,ncol; 2026 Mat B; 2027 MatScalar *array; 2028 2029 PetscFunctionBegin; 2030 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2031 2032 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2033 ai = Aloc->i; aj = Aloc->j; 2034 bi = Bloc->i; bj = Bloc->j; 2035 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2036 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2037 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2038 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2039 for (i=0; i<ai[ma]; i++){ 2040 d_nnz[aj[i]] ++; 2041 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2042 } 2043 2044 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2045 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2046 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2047 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2048 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2049 } else { 2050 B = *matout; 2051 } 2052 2053 /* copy over the A part */ 2054 array = Aloc->a; 2055 row = A->rmap->rstart; 2056 for (i=0; i<ma; i++) { 2057 ncol = ai[i+1]-ai[i]; 2058 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2059 row++; array += ncol; aj += ncol; 2060 } 2061 aj = Aloc->j; 2062 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2063 2064 /* copy over the B part */ 2065 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2066 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2067 array = Bloc->a; 2068 row = A->rmap->rstart; 2069 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2070 cols_tmp = cols; 2071 for (i=0; i<mb; i++) { 2072 ncol = bi[i+1]-bi[i]; 2073 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2074 row++; array += ncol; cols_tmp += ncol; 2075 } 2076 ierr = PetscFree(cols);CHKERRQ(ierr); 2077 2078 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2080 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2081 *matout = B; 2082 } else { 2083 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2084 } 2085 PetscFunctionReturn(0); 2086 } 2087 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2090 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2091 { 2092 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2093 Mat a = aij->A,b = aij->B; 2094 PetscErrorCode ierr; 2095 PetscInt s1,s2,s3; 2096 2097 PetscFunctionBegin; 2098 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2099 if (rr) { 2100 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2101 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2102 /* Overlap communication with computation. */ 2103 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2104 } 2105 if (ll) { 2106 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2107 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2108 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2109 } 2110 /* scale the diagonal block */ 2111 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2112 2113 if (rr) { 2114 /* Do a scatter end and then right scale the off-diagonal block */ 2115 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2117 } 2118 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2124 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2125 { 2126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2131 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2132 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2133 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 #undef __FUNCT__ 2137 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2138 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2139 { 2140 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2141 PetscErrorCode ierr; 2142 2143 PetscFunctionBegin; 2144 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2145 PetscFunctionReturn(0); 2146 } 2147 2148 #undef __FUNCT__ 2149 #define __FUNCT__ "MatEqual_MPIAIJ" 2150 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2151 { 2152 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2153 Mat a,b,c,d; 2154 PetscBool flg; 2155 PetscErrorCode ierr; 2156 2157 PetscFunctionBegin; 2158 a = matA->A; b = matA->B; 2159 c = matB->A; d = matB->B; 2160 2161 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2162 if (flg) { 2163 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2164 } 2165 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2166 PetscFunctionReturn(0); 2167 } 2168 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatCopy_MPIAIJ" 2171 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2172 { 2173 PetscErrorCode ierr; 2174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2175 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2176 2177 PetscFunctionBegin; 2178 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2179 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2180 /* because of the column compression in the off-processor part of the matrix a->B, 2181 the number of columns in a->B and b->B may be different, hence we cannot call 2182 the MatCopy() directly on the two parts. If need be, we can provide a more 2183 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2184 then copying the submatrices */ 2185 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2186 } else { 2187 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2188 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2189 } 2190 PetscFunctionReturn(0); 2191 } 2192 2193 #undef __FUNCT__ 2194 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2195 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2196 { 2197 PetscErrorCode ierr; 2198 2199 PetscFunctionBegin; 2200 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2201 PetscFunctionReturn(0); 2202 } 2203 2204 #undef __FUNCT__ 2205 #define __FUNCT__ "MatAXPY_MPIAIJ" 2206 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2207 { 2208 PetscErrorCode ierr; 2209 PetscInt i; 2210 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2211 PetscBLASInt bnz,one=1; 2212 Mat_SeqAIJ *x,*y; 2213 2214 PetscFunctionBegin; 2215 if (str == SAME_NONZERO_PATTERN) { 2216 PetscScalar alpha = a; 2217 x = (Mat_SeqAIJ *)xx->A->data; 2218 y = (Mat_SeqAIJ *)yy->A->data; 2219 bnz = PetscBLASIntCast(x->nz); 2220 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2221 x = (Mat_SeqAIJ *)xx->B->data; 2222 y = (Mat_SeqAIJ *)yy->B->data; 2223 bnz = PetscBLASIntCast(x->nz); 2224 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2225 } else if (str == SUBSET_NONZERO_PATTERN) { 2226 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2227 2228 x = (Mat_SeqAIJ *)xx->B->data; 2229 y = (Mat_SeqAIJ *)yy->B->data; 2230 if (y->xtoy && y->XtoY != xx->B) { 2231 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2232 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2233 } 2234 if (!y->xtoy) { /* get xtoy */ 2235 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2236 y->XtoY = xx->B; 2237 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2238 } 2239 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2240 } else { 2241 Mat B; 2242 PetscInt *nnz_d,*nnz_o; 2243 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2244 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2245 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2246 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2247 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2248 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2249 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2250 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2251 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2252 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2253 ierr = MatHeaderReplace(Y,B); 2254 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2255 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2256 } 2257 PetscFunctionReturn(0); 2258 } 2259 2260 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2261 2262 #undef __FUNCT__ 2263 #define __FUNCT__ "MatConjugate_MPIAIJ" 2264 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2265 { 2266 #if defined(PETSC_USE_COMPLEX) 2267 PetscErrorCode ierr; 2268 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2269 2270 PetscFunctionBegin; 2271 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2272 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2273 #else 2274 PetscFunctionBegin; 2275 #endif 2276 PetscFunctionReturn(0); 2277 } 2278 2279 #undef __FUNCT__ 2280 #define __FUNCT__ "MatRealPart_MPIAIJ" 2281 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2282 { 2283 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2284 PetscErrorCode ierr; 2285 2286 PetscFunctionBegin; 2287 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2288 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2289 PetscFunctionReturn(0); 2290 } 2291 2292 #undef __FUNCT__ 2293 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2294 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2295 { 2296 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2297 PetscErrorCode ierr; 2298 2299 PetscFunctionBegin; 2300 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2301 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2302 PetscFunctionReturn(0); 2303 } 2304 2305 #ifdef PETSC_HAVE_PBGL 2306 2307 #include <boost/parallel/mpi/bsp_process_group.hpp> 2308 #include <boost/graph/distributed/ilu_default_graph.hpp> 2309 #include <boost/graph/distributed/ilu_0_block.hpp> 2310 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2311 #include <boost/graph/distributed/petsc/interface.hpp> 2312 #include <boost/multi_array.hpp> 2313 #include <boost/parallel/distributed_property_map->hpp> 2314 2315 #undef __FUNCT__ 2316 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2317 /* 2318 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2319 */ 2320 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2321 { 2322 namespace petsc = boost::distributed::petsc; 2323 2324 namespace graph_dist = boost::graph::distributed; 2325 using boost::graph::distributed::ilu_default::process_group_type; 2326 using boost::graph::ilu_permuted; 2327 2328 PetscBool row_identity, col_identity; 2329 PetscContainer c; 2330 PetscInt m, n, M, N; 2331 PetscErrorCode ierr; 2332 2333 PetscFunctionBegin; 2334 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2335 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2336 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2337 if (!row_identity || !col_identity) { 2338 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2339 } 2340 2341 process_group_type pg; 2342 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2343 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2344 lgraph_type& level_graph = *lgraph_p; 2345 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2346 2347 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2348 ilu_permuted(level_graph); 2349 2350 /* put together the new matrix */ 2351 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2352 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2353 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2354 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2355 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2356 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2357 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2358 2359 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2360 ierr = PetscContainerSetPointer(c, lgraph_p); 2361 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2362 ierr = PetscContainerDestroy(&c); 2363 PetscFunctionReturn(0); 2364 } 2365 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2368 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2369 { 2370 PetscFunctionBegin; 2371 PetscFunctionReturn(0); 2372 } 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatSolve_MPIAIJ" 2376 /* 2377 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2378 */ 2379 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2380 { 2381 namespace graph_dist = boost::graph::distributed; 2382 2383 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2384 lgraph_type* lgraph_p; 2385 PetscContainer c; 2386 PetscErrorCode ierr; 2387 2388 PetscFunctionBegin; 2389 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2390 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2391 ierr = VecCopy(b, x);CHKERRQ(ierr); 2392 2393 PetscScalar* array_x; 2394 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2395 PetscInt sx; 2396 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2397 2398 PetscScalar* array_b; 2399 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2400 PetscInt sb; 2401 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2402 2403 lgraph_type& level_graph = *lgraph_p; 2404 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2405 2406 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2407 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2408 ref_x(array_x, boost::extents[num_vertices(graph)]); 2409 2410 typedef boost::iterator_property_map<array_ref_type::iterator, 2411 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2412 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2413 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2414 2415 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2416 2417 PetscFunctionReturn(0); 2418 } 2419 #endif 2420 2421 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2422 PetscInt nzlocal,nsends,nrecvs; 2423 PetscMPIInt *send_rank,*recv_rank; 2424 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2425 PetscScalar *sbuf_a,**rbuf_a; 2426 PetscErrorCode (*Destroy)(Mat); 2427 } Mat_Redundant; 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2431 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2432 { 2433 PetscErrorCode ierr; 2434 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2435 PetscInt i; 2436 2437 PetscFunctionBegin; 2438 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2439 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2440 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2441 for (i=0; i<redund->nrecvs; i++){ 2442 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2443 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2444 } 2445 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2446 ierr = PetscFree(redund);CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 #undef __FUNCT__ 2451 #define __FUNCT__ "MatDestroy_MatRedundant" 2452 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2453 { 2454 PetscErrorCode ierr; 2455 PetscContainer container; 2456 Mat_Redundant *redund=PETSC_NULL; 2457 2458 PetscFunctionBegin; 2459 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2460 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2461 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2462 A->ops->destroy = redund->Destroy; 2463 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2464 if (A->ops->destroy) { 2465 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2466 } 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2472 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2473 { 2474 PetscMPIInt rank,size; 2475 MPI_Comm comm=((PetscObject)mat)->comm; 2476 PetscErrorCode ierr; 2477 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2478 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2479 PetscInt *rowrange=mat->rmap->range; 2480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2481 Mat A=aij->A,B=aij->B,C=*matredundant; 2482 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2483 PetscScalar *sbuf_a; 2484 PetscInt nzlocal=a->nz+b->nz; 2485 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2486 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2487 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2488 MatScalar *aworkA,*aworkB; 2489 PetscScalar *vals; 2490 PetscMPIInt tag1,tag2,tag3,imdex; 2491 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2492 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2493 MPI_Status recv_status,*send_status; 2494 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2495 PetscInt **rbuf_j=PETSC_NULL; 2496 PetscScalar **rbuf_a=PETSC_NULL; 2497 Mat_Redundant *redund=PETSC_NULL; 2498 PetscContainer container; 2499 2500 PetscFunctionBegin; 2501 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2502 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2503 2504 if (reuse == MAT_REUSE_MATRIX) { 2505 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2506 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2507 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2508 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2509 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2510 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2511 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2512 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2513 2514 nsends = redund->nsends; 2515 nrecvs = redund->nrecvs; 2516 send_rank = redund->send_rank; 2517 recv_rank = redund->recv_rank; 2518 sbuf_nz = redund->sbuf_nz; 2519 rbuf_nz = redund->rbuf_nz; 2520 sbuf_j = redund->sbuf_j; 2521 sbuf_a = redund->sbuf_a; 2522 rbuf_j = redund->rbuf_j; 2523 rbuf_a = redund->rbuf_a; 2524 } 2525 2526 if (reuse == MAT_INITIAL_MATRIX){ 2527 PetscMPIInt subrank,subsize; 2528 PetscInt nleftover,np_subcomm; 2529 /* get the destination processors' id send_rank, nsends and nrecvs */ 2530 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2531 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2532 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2533 np_subcomm = size/nsubcomm; 2534 nleftover = size - nsubcomm*np_subcomm; 2535 nsends = 0; nrecvs = 0; 2536 for (i=0; i<size; i++){ /* i=rank*/ 2537 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2538 send_rank[nsends] = i; nsends++; 2539 recv_rank[nrecvs++] = i; 2540 } 2541 } 2542 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2543 i = size-nleftover-1; 2544 j = 0; 2545 while (j < nsubcomm - nleftover){ 2546 send_rank[nsends++] = i; 2547 i--; j++; 2548 } 2549 } 2550 2551 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2552 for (i=0; i<nleftover; i++){ 2553 recv_rank[nrecvs++] = size-nleftover+i; 2554 } 2555 } 2556 2557 /* allocate sbuf_j, sbuf_a */ 2558 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2559 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2560 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2561 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2562 2563 /* copy mat's local entries into the buffers */ 2564 if (reuse == MAT_INITIAL_MATRIX){ 2565 rownz_max = 0; 2566 rptr = sbuf_j; 2567 cols = sbuf_j + rend-rstart + 1; 2568 vals = sbuf_a; 2569 rptr[0] = 0; 2570 for (i=0; i<rend-rstart; i++){ 2571 row = i + rstart; 2572 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2573 ncols = nzA + nzB; 2574 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2575 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2576 /* load the column indices for this row into cols */ 2577 lwrite = 0; 2578 for (l=0; l<nzB; l++) { 2579 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2580 vals[lwrite] = aworkB[l]; 2581 cols[lwrite++] = ctmp; 2582 } 2583 } 2584 for (l=0; l<nzA; l++){ 2585 vals[lwrite] = aworkA[l]; 2586 cols[lwrite++] = cstart + cworkA[l]; 2587 } 2588 for (l=0; l<nzB; l++) { 2589 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2590 vals[lwrite] = aworkB[l]; 2591 cols[lwrite++] = ctmp; 2592 } 2593 } 2594 vals += ncols; 2595 cols += ncols; 2596 rptr[i+1] = rptr[i] + ncols; 2597 if (rownz_max < ncols) rownz_max = ncols; 2598 } 2599 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2600 } else { /* only copy matrix values into sbuf_a */ 2601 rptr = sbuf_j; 2602 vals = sbuf_a; 2603 rptr[0] = 0; 2604 for (i=0; i<rend-rstart; i++){ 2605 row = i + rstart; 2606 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2607 ncols = nzA + nzB; 2608 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2609 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2610 lwrite = 0; 2611 for (l=0; l<nzB; l++) { 2612 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2613 } 2614 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2615 for (l=0; l<nzB; l++) { 2616 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2617 } 2618 vals += ncols; 2619 rptr[i+1] = rptr[i] + ncols; 2620 } 2621 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2622 2623 /* send nzlocal to others, and recv other's nzlocal */ 2624 /*--------------------------------------------------*/ 2625 if (reuse == MAT_INITIAL_MATRIX){ 2626 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2627 s_waits2 = s_waits3 + nsends; 2628 s_waits1 = s_waits2 + nsends; 2629 r_waits1 = s_waits1 + nsends; 2630 r_waits2 = r_waits1 + nrecvs; 2631 r_waits3 = r_waits2 + nrecvs; 2632 } else { 2633 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2634 r_waits3 = s_waits3 + nsends; 2635 } 2636 2637 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2638 if (reuse == MAT_INITIAL_MATRIX){ 2639 /* get new tags to keep the communication clean */ 2640 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2641 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2642 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2643 2644 /* post receives of other's nzlocal */ 2645 for (i=0; i<nrecvs; i++){ 2646 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2647 } 2648 /* send nzlocal to others */ 2649 for (i=0; i<nsends; i++){ 2650 sbuf_nz[i] = nzlocal; 2651 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2652 } 2653 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2654 count = nrecvs; 2655 while (count) { 2656 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2657 recv_rank[imdex] = recv_status.MPI_SOURCE; 2658 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2659 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2660 2661 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2662 rbuf_nz[imdex] += i + 2; 2663 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2664 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2665 count--; 2666 } 2667 /* wait on sends of nzlocal */ 2668 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2669 /* send mat->i,j to others, and recv from other's */ 2670 /*------------------------------------------------*/ 2671 for (i=0; i<nsends; i++){ 2672 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2673 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2674 } 2675 /* wait on receives of mat->i,j */ 2676 /*------------------------------*/ 2677 count = nrecvs; 2678 while (count) { 2679 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2680 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2681 count--; 2682 } 2683 /* wait on sends of mat->i,j */ 2684 /*---------------------------*/ 2685 if (nsends) { 2686 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2687 } 2688 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2689 2690 /* post receives, send and receive mat->a */ 2691 /*----------------------------------------*/ 2692 for (imdex=0; imdex<nrecvs; imdex++) { 2693 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2694 } 2695 for (i=0; i<nsends; i++){ 2696 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2697 } 2698 count = nrecvs; 2699 while (count) { 2700 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2701 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2702 count--; 2703 } 2704 if (nsends) { 2705 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2706 } 2707 2708 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2709 2710 /* create redundant matrix */ 2711 /*-------------------------*/ 2712 if (reuse == MAT_INITIAL_MATRIX){ 2713 /* compute rownz_max for preallocation */ 2714 for (imdex=0; imdex<nrecvs; imdex++){ 2715 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2716 rptr = rbuf_j[imdex]; 2717 for (i=0; i<j; i++){ 2718 ncols = rptr[i+1] - rptr[i]; 2719 if (rownz_max < ncols) rownz_max = ncols; 2720 } 2721 } 2722 2723 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2724 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2725 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2726 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2727 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2728 } else { 2729 C = *matredundant; 2730 } 2731 2732 /* insert local matrix entries */ 2733 rptr = sbuf_j; 2734 cols = sbuf_j + rend-rstart + 1; 2735 vals = sbuf_a; 2736 for (i=0; i<rend-rstart; i++){ 2737 row = i + rstart; 2738 ncols = rptr[i+1] - rptr[i]; 2739 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2740 vals += ncols; 2741 cols += ncols; 2742 } 2743 /* insert received matrix entries */ 2744 for (imdex=0; imdex<nrecvs; imdex++){ 2745 rstart = rowrange[recv_rank[imdex]]; 2746 rend = rowrange[recv_rank[imdex]+1]; 2747 rptr = rbuf_j[imdex]; 2748 cols = rbuf_j[imdex] + rend-rstart + 1; 2749 vals = rbuf_a[imdex]; 2750 for (i=0; i<rend-rstart; i++){ 2751 row = i + rstart; 2752 ncols = rptr[i+1] - rptr[i]; 2753 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2754 vals += ncols; 2755 cols += ncols; 2756 } 2757 } 2758 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2759 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2760 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2761 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2762 if (reuse == MAT_INITIAL_MATRIX) { 2763 PetscContainer container; 2764 *matredundant = C; 2765 /* create a supporting struct and attach it to C for reuse */ 2766 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2767 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2768 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2769 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2770 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2771 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2772 2773 redund->nzlocal = nzlocal; 2774 redund->nsends = nsends; 2775 redund->nrecvs = nrecvs; 2776 redund->send_rank = send_rank; 2777 redund->recv_rank = recv_rank; 2778 redund->sbuf_nz = sbuf_nz; 2779 redund->rbuf_nz = rbuf_nz; 2780 redund->sbuf_j = sbuf_j; 2781 redund->sbuf_a = sbuf_a; 2782 redund->rbuf_j = rbuf_j; 2783 redund->rbuf_a = rbuf_a; 2784 2785 redund->Destroy = C->ops->destroy; 2786 C->ops->destroy = MatDestroy_MatRedundant; 2787 } 2788 PetscFunctionReturn(0); 2789 } 2790 2791 #undef __FUNCT__ 2792 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2793 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2794 { 2795 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2796 PetscErrorCode ierr; 2797 PetscInt i,*idxb = 0; 2798 PetscScalar *va,*vb; 2799 Vec vtmp; 2800 2801 PetscFunctionBegin; 2802 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2803 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2804 if (idx) { 2805 for (i=0; i<A->rmap->n; i++) { 2806 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2807 } 2808 } 2809 2810 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2811 if (idx) { 2812 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2813 } 2814 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2815 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2816 2817 for (i=0; i<A->rmap->n; i++){ 2818 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2819 va[i] = vb[i]; 2820 if (idx) idx[i] = a->garray[idxb[i]]; 2821 } 2822 } 2823 2824 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2825 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2826 ierr = PetscFree(idxb);CHKERRQ(ierr); 2827 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2828 PetscFunctionReturn(0); 2829 } 2830 2831 #undef __FUNCT__ 2832 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2833 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2834 { 2835 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2836 PetscErrorCode ierr; 2837 PetscInt i,*idxb = 0; 2838 PetscScalar *va,*vb; 2839 Vec vtmp; 2840 2841 PetscFunctionBegin; 2842 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2843 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2844 if (idx) { 2845 for (i=0; i<A->cmap->n; i++) { 2846 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2847 } 2848 } 2849 2850 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2851 if (idx) { 2852 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2853 } 2854 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2855 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2856 2857 for (i=0; i<A->rmap->n; i++){ 2858 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2859 va[i] = vb[i]; 2860 if (idx) idx[i] = a->garray[idxb[i]]; 2861 } 2862 } 2863 2864 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2865 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2866 ierr = PetscFree(idxb);CHKERRQ(ierr); 2867 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2868 PetscFunctionReturn(0); 2869 } 2870 2871 #undef __FUNCT__ 2872 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2873 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2874 { 2875 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2876 PetscInt n = A->rmap->n; 2877 PetscInt cstart = A->cmap->rstart; 2878 PetscInt *cmap = mat->garray; 2879 PetscInt *diagIdx, *offdiagIdx; 2880 Vec diagV, offdiagV; 2881 PetscScalar *a, *diagA, *offdiagA; 2882 PetscInt r; 2883 PetscErrorCode ierr; 2884 2885 PetscFunctionBegin; 2886 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2887 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2888 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2889 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2890 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2891 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2892 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2893 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2894 for(r = 0; r < n; ++r) { 2895 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2896 a[r] = diagA[r]; 2897 idx[r] = cstart + diagIdx[r]; 2898 } else { 2899 a[r] = offdiagA[r]; 2900 idx[r] = cmap[offdiagIdx[r]]; 2901 } 2902 } 2903 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2904 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2905 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2906 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2907 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2908 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2909 PetscFunctionReturn(0); 2910 } 2911 2912 #undef __FUNCT__ 2913 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2914 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2915 { 2916 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2917 PetscInt n = A->rmap->n; 2918 PetscInt cstart = A->cmap->rstart; 2919 PetscInt *cmap = mat->garray; 2920 PetscInt *diagIdx, *offdiagIdx; 2921 Vec diagV, offdiagV; 2922 PetscScalar *a, *diagA, *offdiagA; 2923 PetscInt r; 2924 PetscErrorCode ierr; 2925 2926 PetscFunctionBegin; 2927 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2928 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2929 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2930 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2931 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2932 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2933 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2934 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2935 for(r = 0; r < n; ++r) { 2936 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2937 a[r] = diagA[r]; 2938 idx[r] = cstart + diagIdx[r]; 2939 } else { 2940 a[r] = offdiagA[r]; 2941 idx[r] = cmap[offdiagIdx[r]]; 2942 } 2943 } 2944 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2945 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2946 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2947 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2948 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2949 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2950 PetscFunctionReturn(0); 2951 } 2952 2953 #undef __FUNCT__ 2954 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ" 2955 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat) 2956 { 2957 PetscErrorCode ierr; 2958 Mat *dummy; 2959 2960 PetscFunctionBegin; 2961 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2962 *newmat = *dummy; 2963 ierr = PetscFree(dummy);CHKERRQ(ierr); 2964 PetscFunctionReturn(0); 2965 } 2966 2967 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2968 2969 #undef __FUNCT__ 2970 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2971 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2972 { 2973 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2974 PetscErrorCode ierr; 2975 2976 PetscFunctionBegin; 2977 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2978 PetscFunctionReturn(0); 2979 } 2980 2981 2982 /* -------------------------------------------------------------------*/ 2983 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2984 MatGetRow_MPIAIJ, 2985 MatRestoreRow_MPIAIJ, 2986 MatMult_MPIAIJ, 2987 /* 4*/ MatMultAdd_MPIAIJ, 2988 MatMultTranspose_MPIAIJ, 2989 MatMultTransposeAdd_MPIAIJ, 2990 #ifdef PETSC_HAVE_PBGL 2991 MatSolve_MPIAIJ, 2992 #else 2993 0, 2994 #endif 2995 0, 2996 0, 2997 /*10*/ 0, 2998 0, 2999 0, 3000 MatSOR_MPIAIJ, 3001 MatTranspose_MPIAIJ, 3002 /*15*/ MatGetInfo_MPIAIJ, 3003 MatEqual_MPIAIJ, 3004 MatGetDiagonal_MPIAIJ, 3005 MatDiagonalScale_MPIAIJ, 3006 MatNorm_MPIAIJ, 3007 /*20*/ MatAssemblyBegin_MPIAIJ, 3008 MatAssemblyEnd_MPIAIJ, 3009 MatSetOption_MPIAIJ, 3010 MatZeroEntries_MPIAIJ, 3011 /*24*/ MatZeroRows_MPIAIJ, 3012 0, 3013 #ifdef PETSC_HAVE_PBGL 3014 0, 3015 #else 3016 0, 3017 #endif 3018 0, 3019 0, 3020 /*29*/ MatSetUpPreallocation_MPIAIJ, 3021 #ifdef PETSC_HAVE_PBGL 3022 0, 3023 #else 3024 0, 3025 #endif 3026 0, 3027 0, 3028 0, 3029 /*34*/ MatDuplicate_MPIAIJ, 3030 0, 3031 0, 3032 0, 3033 0, 3034 /*39*/ MatAXPY_MPIAIJ, 3035 MatGetSubMatrices_MPIAIJ, 3036 MatIncreaseOverlap_MPIAIJ, 3037 MatGetValues_MPIAIJ, 3038 MatCopy_MPIAIJ, 3039 /*44*/ MatGetRowMax_MPIAIJ, 3040 MatScale_MPIAIJ, 3041 0, 3042 0, 3043 MatZeroRowsColumns_MPIAIJ, 3044 /*49*/ MatSetBlockSize_MPIAIJ, 3045 0, 3046 0, 3047 0, 3048 0, 3049 /*54*/ MatFDColoringCreate_MPIAIJ, 3050 0, 3051 MatSetUnfactored_MPIAIJ, 3052 MatPermute_MPIAIJ, 3053 0, 3054 /*59*/ MatGetSubMatrix_MPIAIJ, 3055 MatDestroy_MPIAIJ, 3056 MatView_MPIAIJ, 3057 0, 3058 0, 3059 /*64*/ 0, 3060 0, 3061 0, 3062 0, 3063 0, 3064 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3065 MatGetRowMinAbs_MPIAIJ, 3066 0, 3067 MatSetColoring_MPIAIJ, 3068 #if defined(PETSC_HAVE_ADIC) 3069 MatSetValuesAdic_MPIAIJ, 3070 #else 3071 0, 3072 #endif 3073 MatSetValuesAdifor_MPIAIJ, 3074 /*75*/ MatFDColoringApply_AIJ, 3075 0, 3076 0, 3077 0, 3078 0, 3079 /*80*/ 0, 3080 0, 3081 0, 3082 /*83*/ MatLoad_MPIAIJ, 3083 0, 3084 0, 3085 0, 3086 0, 3087 0, 3088 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3089 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3090 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3091 MatPtAP_Basic, 3092 MatPtAPSymbolic_MPIAIJ, 3093 /*94*/ MatPtAPNumeric_MPIAIJ, 3094 0, 3095 0, 3096 0, 3097 0, 3098 /*99*/ 0, 3099 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3100 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3101 MatConjugate_MPIAIJ, 3102 0, 3103 /*104*/MatSetValuesRow_MPIAIJ, 3104 MatRealPart_MPIAIJ, 3105 MatImaginaryPart_MPIAIJ, 3106 0, 3107 0, 3108 /*109*/0, 3109 MatGetRedundantMatrix_MPIAIJ, 3110 MatGetRowMin_MPIAIJ, 3111 0, 3112 0, 3113 /*114*/MatGetSeqNonzerostructure_MPIAIJ, 3114 0, 3115 0, 3116 0, 3117 0, 3118 /*119*/0, 3119 0, 3120 0, 3121 0, 3122 MatGetMultiProcBlock_MPIAIJ, 3123 /*124*/MatFindNonZeroRows_MPIAIJ, 3124 MatGetColumnNorms_MPIAIJ, 3125 MatInvertBlockDiagonal_MPIAIJ, 3126 0, 3127 MatGetSubMatricesParallel_MPIAIJ 3128 }; 3129 3130 /* ----------------------------------------------------------------------------------------*/ 3131 3132 EXTERN_C_BEGIN 3133 #undef __FUNCT__ 3134 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3135 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3136 { 3137 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3138 PetscErrorCode ierr; 3139 3140 PetscFunctionBegin; 3141 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3142 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3143 PetscFunctionReturn(0); 3144 } 3145 EXTERN_C_END 3146 3147 EXTERN_C_BEGIN 3148 #undef __FUNCT__ 3149 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3150 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3151 { 3152 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3153 PetscErrorCode ierr; 3154 3155 PetscFunctionBegin; 3156 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3157 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3158 PetscFunctionReturn(0); 3159 } 3160 EXTERN_C_END 3161 3162 EXTERN_C_BEGIN 3163 #undef __FUNCT__ 3164 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3165 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3166 { 3167 Mat_MPIAIJ *b; 3168 PetscErrorCode ierr; 3169 PetscInt i; 3170 3171 PetscFunctionBegin; 3172 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3173 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3174 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3175 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3176 3177 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3178 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3179 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3180 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3181 if (d_nnz) { 3182 for (i=0; i<B->rmap->n; i++) { 3183 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3184 } 3185 } 3186 if (o_nnz) { 3187 for (i=0; i<B->rmap->n; i++) { 3188 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3189 } 3190 } 3191 b = (Mat_MPIAIJ*)B->data; 3192 3193 if (!B->preallocated) { 3194 /* Explicitly create 2 MATSEQAIJ matrices. */ 3195 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3196 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3197 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3198 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3199 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3200 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3201 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3202 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3203 } 3204 3205 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3206 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3207 B->preallocated = PETSC_TRUE; 3208 PetscFunctionReturn(0); 3209 } 3210 EXTERN_C_END 3211 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3214 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3215 { 3216 Mat mat; 3217 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3218 PetscErrorCode ierr; 3219 3220 PetscFunctionBegin; 3221 *newmat = 0; 3222 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3223 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3224 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3225 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3226 a = (Mat_MPIAIJ*)mat->data; 3227 3228 mat->factortype = matin->factortype; 3229 mat->rmap->bs = matin->rmap->bs; 3230 mat->assembled = PETSC_TRUE; 3231 mat->insertmode = NOT_SET_VALUES; 3232 mat->preallocated = PETSC_TRUE; 3233 3234 a->size = oldmat->size; 3235 a->rank = oldmat->rank; 3236 a->donotstash = oldmat->donotstash; 3237 a->roworiented = oldmat->roworiented; 3238 a->rowindices = 0; 3239 a->rowvalues = 0; 3240 a->getrowactive = PETSC_FALSE; 3241 3242 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3243 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3244 3245 if (oldmat->colmap) { 3246 #if defined (PETSC_USE_CTABLE) 3247 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3248 #else 3249 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3250 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3251 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3252 #endif 3253 } else a->colmap = 0; 3254 if (oldmat->garray) { 3255 PetscInt len; 3256 len = oldmat->B->cmap->n; 3257 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3258 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3259 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3260 } else a->garray = 0; 3261 3262 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3263 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3264 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3265 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3266 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3267 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3268 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3269 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3270 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3271 *newmat = mat; 3272 PetscFunctionReturn(0); 3273 } 3274 3275 3276 3277 #undef __FUNCT__ 3278 #define __FUNCT__ "MatLoad_MPIAIJ" 3279 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3280 { 3281 PetscScalar *vals,*svals; 3282 MPI_Comm comm = ((PetscObject)viewer)->comm; 3283 PetscErrorCode ierr; 3284 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3285 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3286 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3287 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3288 PetscInt cend,cstart,n,*rowners,sizesset=1; 3289 int fd; 3290 3291 PetscFunctionBegin; 3292 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3293 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3294 if (!rank) { 3295 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3296 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3297 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3298 } 3299 3300 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3301 3302 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3303 M = header[1]; N = header[2]; 3304 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3305 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3306 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3307 3308 /* If global sizes are set, check if they are consistent with that given in the file */ 3309 if (sizesset) { 3310 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3311 } 3312 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3313 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3314 3315 /* determine ownership of all rows */ 3316 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3317 else m = newMat->rmap->n; /* Set by user */ 3318 3319 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3320 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3321 3322 /* First process needs enough room for process with most rows */ 3323 if (!rank) { 3324 mmax = rowners[1]; 3325 for (i=2; i<size; i++) { 3326 mmax = PetscMax(mmax,rowners[i]); 3327 } 3328 } else mmax = m; 3329 3330 rowners[0] = 0; 3331 for (i=2; i<=size; i++) { 3332 rowners[i] += rowners[i-1]; 3333 } 3334 rstart = rowners[rank]; 3335 rend = rowners[rank+1]; 3336 3337 /* distribute row lengths to all processors */ 3338 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3339 if (!rank) { 3340 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3341 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3342 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3343 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3344 for (j=0; j<m; j++) { 3345 procsnz[0] += ourlens[j]; 3346 } 3347 for (i=1; i<size; i++) { 3348 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3349 /* calculate the number of nonzeros on each processor */ 3350 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3351 procsnz[i] += rowlengths[j]; 3352 } 3353 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3354 } 3355 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3356 } else { 3357 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3358 } 3359 3360 if (!rank) { 3361 /* determine max buffer needed and allocate it */ 3362 maxnz = 0; 3363 for (i=0; i<size; i++) { 3364 maxnz = PetscMax(maxnz,procsnz[i]); 3365 } 3366 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3367 3368 /* read in my part of the matrix column indices */ 3369 nz = procsnz[0]; 3370 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3371 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3372 3373 /* read in every one elses and ship off */ 3374 for (i=1; i<size; i++) { 3375 nz = procsnz[i]; 3376 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3377 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3378 } 3379 ierr = PetscFree(cols);CHKERRQ(ierr); 3380 } else { 3381 /* determine buffer space needed for message */ 3382 nz = 0; 3383 for (i=0; i<m; i++) { 3384 nz += ourlens[i]; 3385 } 3386 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3387 3388 /* receive message of column indices*/ 3389 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3390 } 3391 3392 /* determine column ownership if matrix is not square */ 3393 if (N != M) { 3394 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3395 else n = newMat->cmap->n; 3396 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3397 cstart = cend - n; 3398 } else { 3399 cstart = rstart; 3400 cend = rend; 3401 n = cend - cstart; 3402 } 3403 3404 /* loop over local rows, determining number of off diagonal entries */ 3405 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3406 jj = 0; 3407 for (i=0; i<m; i++) { 3408 for (j=0; j<ourlens[i]; j++) { 3409 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3410 jj++; 3411 } 3412 } 3413 3414 for (i=0; i<m; i++) { 3415 ourlens[i] -= offlens[i]; 3416 } 3417 if (!sizesset) { 3418 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3419 } 3420 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3421 3422 for (i=0; i<m; i++) { 3423 ourlens[i] += offlens[i]; 3424 } 3425 3426 if (!rank) { 3427 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3428 3429 /* read in my part of the matrix numerical values */ 3430 nz = procsnz[0]; 3431 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3432 3433 /* insert into matrix */ 3434 jj = rstart; 3435 smycols = mycols; 3436 svals = vals; 3437 for (i=0; i<m; i++) { 3438 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3439 smycols += ourlens[i]; 3440 svals += ourlens[i]; 3441 jj++; 3442 } 3443 3444 /* read in other processors and ship out */ 3445 for (i=1; i<size; i++) { 3446 nz = procsnz[i]; 3447 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3448 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3449 } 3450 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3451 } else { 3452 /* receive numeric values */ 3453 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3454 3455 /* receive message of values*/ 3456 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3457 3458 /* insert into matrix */ 3459 jj = rstart; 3460 smycols = mycols; 3461 svals = vals; 3462 for (i=0; i<m; i++) { 3463 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3464 smycols += ourlens[i]; 3465 svals += ourlens[i]; 3466 jj++; 3467 } 3468 } 3469 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3470 ierr = PetscFree(vals);CHKERRQ(ierr); 3471 ierr = PetscFree(mycols);CHKERRQ(ierr); 3472 ierr = PetscFree(rowners);CHKERRQ(ierr); 3473 3474 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3475 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3476 PetscFunctionReturn(0); 3477 } 3478 3479 #undef __FUNCT__ 3480 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3481 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3482 { 3483 PetscErrorCode ierr; 3484 IS iscol_local; 3485 PetscInt csize; 3486 3487 PetscFunctionBegin; 3488 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3489 if (call == MAT_REUSE_MATRIX) { 3490 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3491 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3492 } else { 3493 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3494 } 3495 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3496 if (call == MAT_INITIAL_MATRIX) { 3497 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3498 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3499 } 3500 PetscFunctionReturn(0); 3501 } 3502 3503 #undef __FUNCT__ 3504 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3505 /* 3506 Not great since it makes two copies of the submatrix, first an SeqAIJ 3507 in local and then by concatenating the local matrices the end result. 3508 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3509 3510 Note: This requires a sequential iscol with all indices. 3511 */ 3512 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3513 { 3514 PetscErrorCode ierr; 3515 PetscMPIInt rank,size; 3516 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3517 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3518 Mat *local,M,Mreuse; 3519 MatScalar *vwork,*aa; 3520 MPI_Comm comm = ((PetscObject)mat)->comm; 3521 Mat_SeqAIJ *aij; 3522 3523 3524 PetscFunctionBegin; 3525 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3527 3528 if (call == MAT_REUSE_MATRIX) { 3529 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3530 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3531 local = &Mreuse; 3532 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3533 } else { 3534 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3535 Mreuse = *local; 3536 ierr = PetscFree(local);CHKERRQ(ierr); 3537 } 3538 3539 /* 3540 m - number of local rows 3541 n - number of columns (same on all processors) 3542 rstart - first row in new global matrix generated 3543 */ 3544 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3545 if (call == MAT_INITIAL_MATRIX) { 3546 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3547 ii = aij->i; 3548 jj = aij->j; 3549 3550 /* 3551 Determine the number of non-zeros in the diagonal and off-diagonal 3552 portions of the matrix in order to do correct preallocation 3553 */ 3554 3555 /* first get start and end of "diagonal" columns */ 3556 if (csize == PETSC_DECIDE) { 3557 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3558 if (mglobal == n) { /* square matrix */ 3559 nlocal = m; 3560 } else { 3561 nlocal = n/size + ((n % size) > rank); 3562 } 3563 } else { 3564 nlocal = csize; 3565 } 3566 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3567 rstart = rend - nlocal; 3568 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3569 3570 /* next, compute all the lengths */ 3571 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3572 olens = dlens + m; 3573 for (i=0; i<m; i++) { 3574 jend = ii[i+1] - ii[i]; 3575 olen = 0; 3576 dlen = 0; 3577 for (j=0; j<jend; j++) { 3578 if (*jj < rstart || *jj >= rend) olen++; 3579 else dlen++; 3580 jj++; 3581 } 3582 olens[i] = olen; 3583 dlens[i] = dlen; 3584 } 3585 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3586 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3587 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3588 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3589 ierr = PetscFree(dlens);CHKERRQ(ierr); 3590 } else { 3591 PetscInt ml,nl; 3592 3593 M = *newmat; 3594 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3595 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3596 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3597 /* 3598 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3599 rather than the slower MatSetValues(). 3600 */ 3601 M->was_assembled = PETSC_TRUE; 3602 M->assembled = PETSC_FALSE; 3603 } 3604 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3605 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3606 ii = aij->i; 3607 jj = aij->j; 3608 aa = aij->a; 3609 for (i=0; i<m; i++) { 3610 row = rstart + i; 3611 nz = ii[i+1] - ii[i]; 3612 cwork = jj; jj += nz; 3613 vwork = aa; aa += nz; 3614 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3615 } 3616 3617 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3618 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3619 *newmat = M; 3620 3621 /* save submatrix used in processor for next request */ 3622 if (call == MAT_INITIAL_MATRIX) { 3623 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3624 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3625 } 3626 3627 PetscFunctionReturn(0); 3628 } 3629 3630 EXTERN_C_BEGIN 3631 #undef __FUNCT__ 3632 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3633 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3634 { 3635 PetscInt m,cstart, cend,j,nnz,i,d; 3636 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3637 const PetscInt *JJ; 3638 PetscScalar *values; 3639 PetscErrorCode ierr; 3640 3641 PetscFunctionBegin; 3642 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3643 3644 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3645 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3646 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3647 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3648 m = B->rmap->n; 3649 cstart = B->cmap->rstart; 3650 cend = B->cmap->rend; 3651 rstart = B->rmap->rstart; 3652 3653 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3654 3655 #if defined(PETSC_USE_DEBUGGING) 3656 for (i=0; i<m; i++) { 3657 nnz = Ii[i+1]- Ii[i]; 3658 JJ = J + Ii[i]; 3659 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3660 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3661 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3662 } 3663 #endif 3664 3665 for (i=0; i<m; i++) { 3666 nnz = Ii[i+1]- Ii[i]; 3667 JJ = J + Ii[i]; 3668 nnz_max = PetscMax(nnz_max,nnz); 3669 d = 0; 3670 for (j=0; j<nnz; j++) { 3671 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3672 } 3673 d_nnz[i] = d; 3674 o_nnz[i] = nnz - d; 3675 } 3676 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3677 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3678 3679 if (v) values = (PetscScalar*)v; 3680 else { 3681 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3682 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3683 } 3684 3685 for (i=0; i<m; i++) { 3686 ii = i + rstart; 3687 nnz = Ii[i+1]- Ii[i]; 3688 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3689 } 3690 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3691 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3692 3693 if (!v) { 3694 ierr = PetscFree(values);CHKERRQ(ierr); 3695 } 3696 PetscFunctionReturn(0); 3697 } 3698 EXTERN_C_END 3699 3700 #undef __FUNCT__ 3701 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3702 /*@ 3703 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3704 (the default parallel PETSc format). 3705 3706 Collective on MPI_Comm 3707 3708 Input Parameters: 3709 + B - the matrix 3710 . i - the indices into j for the start of each local row (starts with zero) 3711 . j - the column indices for each local row (starts with zero) 3712 - v - optional values in the matrix 3713 3714 Level: developer 3715 3716 Notes: 3717 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3718 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3719 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3720 3721 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3722 3723 The format which is used for the sparse matrix input, is equivalent to a 3724 row-major ordering.. i.e for the following matrix, the input data expected is 3725 as shown: 3726 3727 1 0 0 3728 2 0 3 P0 3729 ------- 3730 4 5 6 P1 3731 3732 Process0 [P0]: rows_owned=[0,1] 3733 i = {0,1,3} [size = nrow+1 = 2+1] 3734 j = {0,0,2} [size = nz = 6] 3735 v = {1,2,3} [size = nz = 6] 3736 3737 Process1 [P1]: rows_owned=[2] 3738 i = {0,3} [size = nrow+1 = 1+1] 3739 j = {0,1,2} [size = nz = 6] 3740 v = {4,5,6} [size = nz = 6] 3741 3742 .keywords: matrix, aij, compressed row, sparse, parallel 3743 3744 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3745 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3746 @*/ 3747 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3748 { 3749 PetscErrorCode ierr; 3750 3751 PetscFunctionBegin; 3752 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3753 PetscFunctionReturn(0); 3754 } 3755 3756 #undef __FUNCT__ 3757 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3758 /*@C 3759 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3760 (the default parallel PETSc format). For good matrix assembly performance 3761 the user should preallocate the matrix storage by setting the parameters 3762 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3763 performance can be increased by more than a factor of 50. 3764 3765 Collective on MPI_Comm 3766 3767 Input Parameters: 3768 + A - the matrix 3769 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3770 (same value is used for all local rows) 3771 . d_nnz - array containing the number of nonzeros in the various rows of the 3772 DIAGONAL portion of the local submatrix (possibly different for each row) 3773 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3774 The size of this array is equal to the number of local rows, i.e 'm'. 3775 You must leave room for the diagonal entry even if it is zero. 3776 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3777 submatrix (same value is used for all local rows). 3778 - o_nnz - array containing the number of nonzeros in the various rows of the 3779 OFF-DIAGONAL portion of the local submatrix (possibly different for 3780 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3781 structure. The size of this array is equal to the number 3782 of local rows, i.e 'm'. 3783 3784 If the *_nnz parameter is given then the *_nz parameter is ignored 3785 3786 The AIJ format (also called the Yale sparse matrix format or 3787 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3788 storage. The stored row and column indices begin with zero. 3789 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3790 3791 The parallel matrix is partitioned such that the first m0 rows belong to 3792 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3793 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3794 3795 The DIAGONAL portion of the local submatrix of a processor can be defined 3796 as the submatrix which is obtained by extraction the part corresponding to 3797 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3798 first row that belongs to the processor, r2 is the last row belonging to 3799 the this processor, and c1-c2 is range of indices of the local part of a 3800 vector suitable for applying the matrix to. This is an mxn matrix. In the 3801 common case of a square matrix, the row and column ranges are the same and 3802 the DIAGONAL part is also square. The remaining portion of the local 3803 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3804 3805 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3806 3807 You can call MatGetInfo() to get information on how effective the preallocation was; 3808 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3809 You can also run with the option -info and look for messages with the string 3810 malloc in them to see if additional memory allocation was needed. 3811 3812 Example usage: 3813 3814 Consider the following 8x8 matrix with 34 non-zero values, that is 3815 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3816 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3817 as follows: 3818 3819 .vb 3820 1 2 0 | 0 3 0 | 0 4 3821 Proc0 0 5 6 | 7 0 0 | 8 0 3822 9 0 10 | 11 0 0 | 12 0 3823 ------------------------------------- 3824 13 0 14 | 15 16 17 | 0 0 3825 Proc1 0 18 0 | 19 20 21 | 0 0 3826 0 0 0 | 22 23 0 | 24 0 3827 ------------------------------------- 3828 Proc2 25 26 27 | 0 0 28 | 29 0 3829 30 0 0 | 31 32 33 | 0 34 3830 .ve 3831 3832 This can be represented as a collection of submatrices as: 3833 3834 .vb 3835 A B C 3836 D E F 3837 G H I 3838 .ve 3839 3840 Where the submatrices A,B,C are owned by proc0, D,E,F are 3841 owned by proc1, G,H,I are owned by proc2. 3842 3843 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3844 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3845 The 'M','N' parameters are 8,8, and have the same values on all procs. 3846 3847 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3848 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3849 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3850 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3851 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3852 matrix, ans [DF] as another SeqAIJ matrix. 3853 3854 When d_nz, o_nz parameters are specified, d_nz storage elements are 3855 allocated for every row of the local diagonal submatrix, and o_nz 3856 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3857 One way to choose d_nz and o_nz is to use the max nonzerors per local 3858 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3859 In this case, the values of d_nz,o_nz are: 3860 .vb 3861 proc0 : dnz = 2, o_nz = 2 3862 proc1 : dnz = 3, o_nz = 2 3863 proc2 : dnz = 1, o_nz = 4 3864 .ve 3865 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3866 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3867 for proc3. i.e we are using 12+15+10=37 storage locations to store 3868 34 values. 3869 3870 When d_nnz, o_nnz parameters are specified, the storage is specified 3871 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3872 In the above case the values for d_nnz,o_nnz are: 3873 .vb 3874 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3875 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3876 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3877 .ve 3878 Here the space allocated is sum of all the above values i.e 34, and 3879 hence pre-allocation is perfect. 3880 3881 Level: intermediate 3882 3883 .keywords: matrix, aij, compressed row, sparse, parallel 3884 3885 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3886 MPIAIJ, MatGetInfo() 3887 @*/ 3888 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3889 { 3890 PetscErrorCode ierr; 3891 3892 PetscFunctionBegin; 3893 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3894 PetscFunctionReturn(0); 3895 } 3896 3897 #undef __FUNCT__ 3898 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3899 /*@ 3900 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3901 CSR format the local rows. 3902 3903 Collective on MPI_Comm 3904 3905 Input Parameters: 3906 + comm - MPI communicator 3907 . m - number of local rows (Cannot be PETSC_DECIDE) 3908 . n - This value should be the same as the local size used in creating the 3909 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3910 calculated if N is given) For square matrices n is almost always m. 3911 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3912 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3913 . i - row indices 3914 . j - column indices 3915 - a - matrix values 3916 3917 Output Parameter: 3918 . mat - the matrix 3919 3920 Level: intermediate 3921 3922 Notes: 3923 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3924 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3925 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3926 3927 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3928 3929 The format which is used for the sparse matrix input, is equivalent to a 3930 row-major ordering.. i.e for the following matrix, the input data expected is 3931 as shown: 3932 3933 1 0 0 3934 2 0 3 P0 3935 ------- 3936 4 5 6 P1 3937 3938 Process0 [P0]: rows_owned=[0,1] 3939 i = {0,1,3} [size = nrow+1 = 2+1] 3940 j = {0,0,2} [size = nz = 6] 3941 v = {1,2,3} [size = nz = 6] 3942 3943 Process1 [P1]: rows_owned=[2] 3944 i = {0,3} [size = nrow+1 = 1+1] 3945 j = {0,1,2} [size = nz = 6] 3946 v = {4,5,6} [size = nz = 6] 3947 3948 .keywords: matrix, aij, compressed row, sparse, parallel 3949 3950 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3951 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3952 @*/ 3953 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3954 { 3955 PetscErrorCode ierr; 3956 3957 PetscFunctionBegin; 3958 if (i[0]) { 3959 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3960 } 3961 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3962 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3963 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3964 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3965 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3966 PetscFunctionReturn(0); 3967 } 3968 3969 #undef __FUNCT__ 3970 #define __FUNCT__ "MatCreateMPIAIJ" 3971 /*@C 3972 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3973 (the default parallel PETSc format). For good matrix assembly performance 3974 the user should preallocate the matrix storage by setting the parameters 3975 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3976 performance can be increased by more than a factor of 50. 3977 3978 Collective on MPI_Comm 3979 3980 Input Parameters: 3981 + comm - MPI communicator 3982 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3983 This value should be the same as the local size used in creating the 3984 y vector for the matrix-vector product y = Ax. 3985 . n - This value should be the same as the local size used in creating the 3986 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3987 calculated if N is given) For square matrices n is almost always m. 3988 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3989 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3990 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3991 (same value is used for all local rows) 3992 . d_nnz - array containing the number of nonzeros in the various rows of the 3993 DIAGONAL portion of the local submatrix (possibly different for each row) 3994 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3995 The size of this array is equal to the number of local rows, i.e 'm'. 3996 You must leave room for the diagonal entry even if it is zero. 3997 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3998 submatrix (same value is used for all local rows). 3999 - o_nnz - array containing the number of nonzeros in the various rows of the 4000 OFF-DIAGONAL portion of the local submatrix (possibly different for 4001 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4002 structure. The size of this array is equal to the number 4003 of local rows, i.e 'm'. 4004 4005 Output Parameter: 4006 . A - the matrix 4007 4008 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4009 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4010 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4011 4012 Notes: 4013 If the *_nnz parameter is given then the *_nz parameter is ignored 4014 4015 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4016 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4017 storage requirements for this matrix. 4018 4019 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4020 processor than it must be used on all processors that share the object for 4021 that argument. 4022 4023 The user MUST specify either the local or global matrix dimensions 4024 (possibly both). 4025 4026 The parallel matrix is partitioned across processors such that the 4027 first m0 rows belong to process 0, the next m1 rows belong to 4028 process 1, the next m2 rows belong to process 2 etc.. where 4029 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4030 values corresponding to [m x N] submatrix. 4031 4032 The columns are logically partitioned with the n0 columns belonging 4033 to 0th partition, the next n1 columns belonging to the next 4034 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4035 4036 The DIAGONAL portion of the local submatrix on any given processor 4037 is the submatrix corresponding to the rows and columns m,n 4038 corresponding to the given processor. i.e diagonal matrix on 4039 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4040 etc. The remaining portion of the local submatrix [m x (N-n)] 4041 constitute the OFF-DIAGONAL portion. The example below better 4042 illustrates this concept. 4043 4044 For a square global matrix we define each processor's diagonal portion 4045 to be its local rows and the corresponding columns (a square submatrix); 4046 each processor's off-diagonal portion encompasses the remainder of the 4047 local matrix (a rectangular submatrix). 4048 4049 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4050 4051 When calling this routine with a single process communicator, a matrix of 4052 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4053 type of communicator, use the construction mechanism: 4054 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4055 4056 By default, this format uses inodes (identical nodes) when possible. 4057 We search for consecutive rows with the same nonzero structure, thereby 4058 reusing matrix information to achieve increased efficiency. 4059 4060 Options Database Keys: 4061 + -mat_no_inode - Do not use inodes 4062 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4063 - -mat_aij_oneindex - Internally use indexing starting at 1 4064 rather than 0. Note that when calling MatSetValues(), 4065 the user still MUST index entries starting at 0! 4066 4067 4068 Example usage: 4069 4070 Consider the following 8x8 matrix with 34 non-zero values, that is 4071 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4072 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4073 as follows: 4074 4075 .vb 4076 1 2 0 | 0 3 0 | 0 4 4077 Proc0 0 5 6 | 7 0 0 | 8 0 4078 9 0 10 | 11 0 0 | 12 0 4079 ------------------------------------- 4080 13 0 14 | 15 16 17 | 0 0 4081 Proc1 0 18 0 | 19 20 21 | 0 0 4082 0 0 0 | 22 23 0 | 24 0 4083 ------------------------------------- 4084 Proc2 25 26 27 | 0 0 28 | 29 0 4085 30 0 0 | 31 32 33 | 0 34 4086 .ve 4087 4088 This can be represented as a collection of submatrices as: 4089 4090 .vb 4091 A B C 4092 D E F 4093 G H I 4094 .ve 4095 4096 Where the submatrices A,B,C are owned by proc0, D,E,F are 4097 owned by proc1, G,H,I are owned by proc2. 4098 4099 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4100 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4101 The 'M','N' parameters are 8,8, and have the same values on all procs. 4102 4103 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4104 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4105 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4106 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4107 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4108 matrix, ans [DF] as another SeqAIJ matrix. 4109 4110 When d_nz, o_nz parameters are specified, d_nz storage elements are 4111 allocated for every row of the local diagonal submatrix, and o_nz 4112 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4113 One way to choose d_nz and o_nz is to use the max nonzerors per local 4114 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4115 In this case, the values of d_nz,o_nz are: 4116 .vb 4117 proc0 : dnz = 2, o_nz = 2 4118 proc1 : dnz = 3, o_nz = 2 4119 proc2 : dnz = 1, o_nz = 4 4120 .ve 4121 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4122 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4123 for proc3. i.e we are using 12+15+10=37 storage locations to store 4124 34 values. 4125 4126 When d_nnz, o_nnz parameters are specified, the storage is specified 4127 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4128 In the above case the values for d_nnz,o_nnz are: 4129 .vb 4130 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4131 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4132 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4133 .ve 4134 Here the space allocated is sum of all the above values i.e 34, and 4135 hence pre-allocation is perfect. 4136 4137 Level: intermediate 4138 4139 .keywords: matrix, aij, compressed row, sparse, parallel 4140 4141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4142 MPIAIJ, MatCreateMPIAIJWithArrays() 4143 @*/ 4144 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4145 { 4146 PetscErrorCode ierr; 4147 PetscMPIInt size; 4148 4149 PetscFunctionBegin; 4150 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4151 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4152 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4153 if (size > 1) { 4154 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4155 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4156 } else { 4157 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4158 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4159 } 4160 PetscFunctionReturn(0); 4161 } 4162 4163 #undef __FUNCT__ 4164 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4165 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4166 { 4167 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4168 4169 PetscFunctionBegin; 4170 *Ad = a->A; 4171 *Ao = a->B; 4172 *colmap = a->garray; 4173 PetscFunctionReturn(0); 4174 } 4175 4176 #undef __FUNCT__ 4177 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4178 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4179 { 4180 PetscErrorCode ierr; 4181 PetscInt i; 4182 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4183 4184 PetscFunctionBegin; 4185 if (coloring->ctype == IS_COLORING_GLOBAL) { 4186 ISColoringValue *allcolors,*colors; 4187 ISColoring ocoloring; 4188 4189 /* set coloring for diagonal portion */ 4190 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4191 4192 /* set coloring for off-diagonal portion */ 4193 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4194 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4195 for (i=0; i<a->B->cmap->n; i++) { 4196 colors[i] = allcolors[a->garray[i]]; 4197 } 4198 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4199 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4200 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4201 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4202 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4203 ISColoringValue *colors; 4204 PetscInt *larray; 4205 ISColoring ocoloring; 4206 4207 /* set coloring for diagonal portion */ 4208 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4209 for (i=0; i<a->A->cmap->n; i++) { 4210 larray[i] = i + A->cmap->rstart; 4211 } 4212 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4213 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4214 for (i=0; i<a->A->cmap->n; i++) { 4215 colors[i] = coloring->colors[larray[i]]; 4216 } 4217 ierr = PetscFree(larray);CHKERRQ(ierr); 4218 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4219 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4220 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4221 4222 /* set coloring for off-diagonal portion */ 4223 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4224 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4225 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4226 for (i=0; i<a->B->cmap->n; i++) { 4227 colors[i] = coloring->colors[larray[i]]; 4228 } 4229 ierr = PetscFree(larray);CHKERRQ(ierr); 4230 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4231 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4232 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4233 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4234 4235 PetscFunctionReturn(0); 4236 } 4237 4238 #if defined(PETSC_HAVE_ADIC) 4239 #undef __FUNCT__ 4240 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4241 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4242 { 4243 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4244 PetscErrorCode ierr; 4245 4246 PetscFunctionBegin; 4247 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4248 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4249 PetscFunctionReturn(0); 4250 } 4251 #endif 4252 4253 #undef __FUNCT__ 4254 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4255 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4256 { 4257 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4258 PetscErrorCode ierr; 4259 4260 PetscFunctionBegin; 4261 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4262 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4263 PetscFunctionReturn(0); 4264 } 4265 4266 #undef __FUNCT__ 4267 #define __FUNCT__ "MatMerge" 4268 /*@ 4269 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4270 matrices from each processor 4271 4272 Collective on MPI_Comm 4273 4274 Input Parameters: 4275 + comm - the communicators the parallel matrix will live on 4276 . inmat - the input sequential matrices 4277 . n - number of local columns (or PETSC_DECIDE) 4278 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4279 4280 Output Parameter: 4281 . outmat - the parallel matrix generated 4282 4283 Level: advanced 4284 4285 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4286 4287 @*/ 4288 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4289 { 4290 PetscErrorCode ierr; 4291 PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz; 4292 PetscInt *indx; 4293 PetscScalar *values; 4294 4295 PetscFunctionBegin; 4296 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4297 if (scall == MAT_INITIAL_MATRIX){ 4298 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 4299 if (n == PETSC_DECIDE){ 4300 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4301 } 4302 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4303 rstart -= m; 4304 4305 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4306 for (i=0;i<m;i++) { 4307 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4308 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4309 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4310 } 4311 /* This routine will ONLY return MPIAIJ type matrix */ 4312 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4313 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4314 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4315 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4316 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4317 4318 } else if (scall == MAT_REUSE_MATRIX){ 4319 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4320 } else { 4321 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4322 } 4323 4324 for (i=0;i<m;i++) { 4325 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4326 Ii = i + rstart; 4327 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4328 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4329 } 4330 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4331 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4332 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4333 4334 PetscFunctionReturn(0); 4335 } 4336 4337 #undef __FUNCT__ 4338 #define __FUNCT__ "MatFileSplit" 4339 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4340 { 4341 PetscErrorCode ierr; 4342 PetscMPIInt rank; 4343 PetscInt m,N,i,rstart,nnz; 4344 size_t len; 4345 const PetscInt *indx; 4346 PetscViewer out; 4347 char *name; 4348 Mat B; 4349 const PetscScalar *values; 4350 4351 PetscFunctionBegin; 4352 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4353 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4354 /* Should this be the type of the diagonal block of A? */ 4355 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4356 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4357 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4358 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4359 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4360 for (i=0;i<m;i++) { 4361 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4362 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4363 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4364 } 4365 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4366 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4367 4368 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4369 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4370 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4371 sprintf(name,"%s.%d",outfile,rank); 4372 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4373 ierr = PetscFree(name); 4374 ierr = MatView(B,out);CHKERRQ(ierr); 4375 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4376 ierr = MatDestroy(&B);CHKERRQ(ierr); 4377 PetscFunctionReturn(0); 4378 } 4379 4380 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4381 #undef __FUNCT__ 4382 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4383 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4384 { 4385 PetscErrorCode ierr; 4386 Mat_Merge_SeqsToMPI *merge; 4387 PetscContainer container; 4388 4389 PetscFunctionBegin; 4390 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4391 if (container) { 4392 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4393 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4394 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4395 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4396 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4397 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4398 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4399 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4400 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4401 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4402 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4403 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4404 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4405 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4406 ierr = PetscFree(merge);CHKERRQ(ierr); 4407 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4408 } 4409 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4410 PetscFunctionReturn(0); 4411 } 4412 4413 #include <../src/mat/utils/freespace.h> 4414 #include <petscbt.h> 4415 4416 #undef __FUNCT__ 4417 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4418 /*@C 4419 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4420 matrices from each processor 4421 4422 Collective on MPI_Comm 4423 4424 Input Parameters: 4425 + comm - the communicators the parallel matrix will live on 4426 . seqmat - the input sequential matrices 4427 . m - number of local rows (or PETSC_DECIDE) 4428 . n - number of local columns (or PETSC_DECIDE) 4429 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4430 4431 Output Parameter: 4432 . mpimat - the parallel matrix generated 4433 4434 Level: advanced 4435 4436 Notes: 4437 The dimensions of the sequential matrix in each processor MUST be the same. 4438 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4439 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4440 @*/ 4441 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4442 { 4443 PetscErrorCode ierr; 4444 MPI_Comm comm=((PetscObject)mpimat)->comm; 4445 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4446 PetscMPIInt size,rank,taga,*len_s; 4447 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4448 PetscInt proc,m; 4449 PetscInt **buf_ri,**buf_rj; 4450 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4451 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4452 MPI_Request *s_waits,*r_waits; 4453 MPI_Status *status; 4454 MatScalar *aa=a->a; 4455 MatScalar **abuf_r,*ba_i; 4456 Mat_Merge_SeqsToMPI *merge; 4457 PetscContainer container; 4458 4459 PetscFunctionBegin; 4460 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4461 4462 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4463 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4464 4465 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4466 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4467 4468 bi = merge->bi; 4469 bj = merge->bj; 4470 buf_ri = merge->buf_ri; 4471 buf_rj = merge->buf_rj; 4472 4473 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4474 owners = merge->rowmap->range; 4475 len_s = merge->len_s; 4476 4477 /* send and recv matrix values */ 4478 /*-----------------------------*/ 4479 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4480 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4481 4482 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4483 for (proc=0,k=0; proc<size; proc++){ 4484 if (!len_s[proc]) continue; 4485 i = owners[proc]; 4486 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4487 k++; 4488 } 4489 4490 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4491 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4492 ierr = PetscFree(status);CHKERRQ(ierr); 4493 4494 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4495 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4496 4497 /* insert mat values of mpimat */ 4498 /*----------------------------*/ 4499 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4500 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4501 4502 for (k=0; k<merge->nrecv; k++){ 4503 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4504 nrows = *(buf_ri_k[k]); 4505 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4506 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4507 } 4508 4509 /* set values of ba */ 4510 m = merge->rowmap->n; 4511 for (i=0; i<m; i++) { 4512 arow = owners[rank] + i; 4513 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4514 bnzi = bi[i+1] - bi[i]; 4515 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4516 4517 /* add local non-zero vals of this proc's seqmat into ba */ 4518 anzi = ai[arow+1] - ai[arow]; 4519 aj = a->j + ai[arow]; 4520 aa = a->a + ai[arow]; 4521 nextaj = 0; 4522 for (j=0; nextaj<anzi; j++){ 4523 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4524 ba_i[j] += aa[nextaj++]; 4525 } 4526 } 4527 4528 /* add received vals into ba */ 4529 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4530 /* i-th row */ 4531 if (i == *nextrow[k]) { 4532 anzi = *(nextai[k]+1) - *nextai[k]; 4533 aj = buf_rj[k] + *(nextai[k]); 4534 aa = abuf_r[k] + *(nextai[k]); 4535 nextaj = 0; 4536 for (j=0; nextaj<anzi; j++){ 4537 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4538 ba_i[j] += aa[nextaj++]; 4539 } 4540 } 4541 nextrow[k]++; nextai[k]++; 4542 } 4543 } 4544 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4545 } 4546 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4547 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4548 4549 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4550 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4551 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4552 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4553 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4554 PetscFunctionReturn(0); 4555 } 4556 4557 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4558 4559 #undef __FUNCT__ 4560 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4561 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4562 { 4563 PetscErrorCode ierr; 4564 Mat B_mpi; 4565 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4566 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4567 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4568 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4569 PetscInt len,proc,*dnz,*onz; 4570 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4571 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4572 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4573 MPI_Status *status; 4574 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4575 PetscBT lnkbt; 4576 Mat_Merge_SeqsToMPI *merge; 4577 PetscContainer container; 4578 4579 PetscFunctionBegin; 4580 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4581 4582 /* make sure it is a PETSc comm */ 4583 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4584 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4585 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4586 4587 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4588 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4589 4590 /* determine row ownership */ 4591 /*---------------------------------------------------------*/ 4592 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4593 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4594 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4595 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4596 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4597 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4598 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4599 4600 m = merge->rowmap->n; 4601 M = merge->rowmap->N; 4602 owners = merge->rowmap->range; 4603 4604 /* determine the number of messages to send, their lengths */ 4605 /*---------------------------------------------------------*/ 4606 len_s = merge->len_s; 4607 4608 len = 0; /* length of buf_si[] */ 4609 merge->nsend = 0; 4610 for (proc=0; proc<size; proc++){ 4611 len_si[proc] = 0; 4612 if (proc == rank){ 4613 len_s[proc] = 0; 4614 } else { 4615 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4616 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4617 } 4618 if (len_s[proc]) { 4619 merge->nsend++; 4620 nrows = 0; 4621 for (i=owners[proc]; i<owners[proc+1]; i++){ 4622 if (ai[i+1] > ai[i]) nrows++; 4623 } 4624 len_si[proc] = 2*(nrows+1); 4625 len += len_si[proc]; 4626 } 4627 } 4628 4629 /* determine the number and length of messages to receive for ij-structure */ 4630 /*-------------------------------------------------------------------------*/ 4631 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4632 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4633 4634 /* post the Irecv of j-structure */ 4635 /*-------------------------------*/ 4636 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4637 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4638 4639 /* post the Isend of j-structure */ 4640 /*--------------------------------*/ 4641 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4642 4643 for (proc=0, k=0; proc<size; proc++){ 4644 if (!len_s[proc]) continue; 4645 i = owners[proc]; 4646 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4647 k++; 4648 } 4649 4650 /* receives and sends of j-structure are complete */ 4651 /*------------------------------------------------*/ 4652 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4653 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4654 4655 /* send and recv i-structure */ 4656 /*---------------------------*/ 4657 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4658 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4659 4660 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4661 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4662 for (proc=0,k=0; proc<size; proc++){ 4663 if (!len_s[proc]) continue; 4664 /* form outgoing message for i-structure: 4665 buf_si[0]: nrows to be sent 4666 [1:nrows]: row index (global) 4667 [nrows+1:2*nrows+1]: i-structure index 4668 */ 4669 /*-------------------------------------------*/ 4670 nrows = len_si[proc]/2 - 1; 4671 buf_si_i = buf_si + nrows+1; 4672 buf_si[0] = nrows; 4673 buf_si_i[0] = 0; 4674 nrows = 0; 4675 for (i=owners[proc]; i<owners[proc+1]; i++){ 4676 anzi = ai[i+1] - ai[i]; 4677 if (anzi) { 4678 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4679 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4680 nrows++; 4681 } 4682 } 4683 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4684 k++; 4685 buf_si += len_si[proc]; 4686 } 4687 4688 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4689 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4690 4691 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4692 for (i=0; i<merge->nrecv; i++){ 4693 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4694 } 4695 4696 ierr = PetscFree(len_si);CHKERRQ(ierr); 4697 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4698 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4699 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4700 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4701 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4702 ierr = PetscFree(status);CHKERRQ(ierr); 4703 4704 /* compute a local seq matrix in each processor */ 4705 /*----------------------------------------------*/ 4706 /* allocate bi array and free space for accumulating nonzero column info */ 4707 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4708 bi[0] = 0; 4709 4710 /* create and initialize a linked list */ 4711 nlnk = N+1; 4712 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4713 4714 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4715 len = 0; 4716 len = ai[owners[rank+1]] - ai[owners[rank]]; 4717 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4718 current_space = free_space; 4719 4720 /* determine symbolic info for each local row */ 4721 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4722 4723 for (k=0; k<merge->nrecv; k++){ 4724 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4725 nrows = *buf_ri_k[k]; 4726 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4727 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4728 } 4729 4730 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4731 len = 0; 4732 for (i=0;i<m;i++) { 4733 bnzi = 0; 4734 /* add local non-zero cols of this proc's seqmat into lnk */ 4735 arow = owners[rank] + i; 4736 anzi = ai[arow+1] - ai[arow]; 4737 aj = a->j + ai[arow]; 4738 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4739 bnzi += nlnk; 4740 /* add received col data into lnk */ 4741 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4742 if (i == *nextrow[k]) { /* i-th row */ 4743 anzi = *(nextai[k]+1) - *nextai[k]; 4744 aj = buf_rj[k] + *nextai[k]; 4745 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4746 bnzi += nlnk; 4747 nextrow[k]++; nextai[k]++; 4748 } 4749 } 4750 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4751 4752 /* if free space is not available, make more free space */ 4753 if (current_space->local_remaining<bnzi) { 4754 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4755 nspacedouble++; 4756 } 4757 /* copy data into free space, then initialize lnk */ 4758 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4759 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4760 4761 current_space->array += bnzi; 4762 current_space->local_used += bnzi; 4763 current_space->local_remaining -= bnzi; 4764 4765 bi[i+1] = bi[i] + bnzi; 4766 } 4767 4768 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4769 4770 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4771 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4772 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4773 4774 /* create symbolic parallel matrix B_mpi */ 4775 /*---------------------------------------*/ 4776 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4777 if (n==PETSC_DECIDE) { 4778 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4779 } else { 4780 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4781 } 4782 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4783 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4784 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4785 4786 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4787 B_mpi->assembled = PETSC_FALSE; 4788 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4789 merge->bi = bi; 4790 merge->bj = bj; 4791 merge->buf_ri = buf_ri; 4792 merge->buf_rj = buf_rj; 4793 merge->coi = PETSC_NULL; 4794 merge->coj = PETSC_NULL; 4795 merge->owners_co = PETSC_NULL; 4796 4797 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4798 4799 /* attach the supporting struct to B_mpi for reuse */ 4800 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4801 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4802 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4803 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4804 *mpimat = B_mpi; 4805 4806 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4807 PetscFunctionReturn(0); 4808 } 4809 4810 #undef __FUNCT__ 4811 #define __FUNCT__ "MatMerge_SeqsToMPI" 4812 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4813 { 4814 PetscErrorCode ierr; 4815 4816 PetscFunctionBegin; 4817 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4818 if (scall == MAT_INITIAL_MATRIX){ 4819 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4820 } 4821 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4822 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4823 PetscFunctionReturn(0); 4824 } 4825 4826 #undef __FUNCT__ 4827 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4828 /*@ 4829 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4830 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4831 with MatGetSize() 4832 4833 Not Collective 4834 4835 Input Parameters: 4836 + A - the matrix 4837 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4838 4839 Output Parameter: 4840 . A_loc - the local sequential matrix generated 4841 4842 Level: developer 4843 4844 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4845 4846 @*/ 4847 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4848 { 4849 PetscErrorCode ierr; 4850 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4851 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4852 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4853 MatScalar *aa=a->a,*ba=b->a,*cam; 4854 PetscScalar *ca; 4855 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4856 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4857 PetscBool match; 4858 4859 PetscFunctionBegin; 4860 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4861 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4862 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4863 if (scall == MAT_INITIAL_MATRIX){ 4864 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4865 ci[0] = 0; 4866 for (i=0; i<am; i++){ 4867 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4868 } 4869 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4870 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4871 k = 0; 4872 for (i=0; i<am; i++) { 4873 ncols_o = bi[i+1] - bi[i]; 4874 ncols_d = ai[i+1] - ai[i]; 4875 /* off-diagonal portion of A */ 4876 for (jo=0; jo<ncols_o; jo++) { 4877 col = cmap[*bj]; 4878 if (col >= cstart) break; 4879 cj[k] = col; bj++; 4880 ca[k++] = *ba++; 4881 } 4882 /* diagonal portion of A */ 4883 for (j=0; j<ncols_d; j++) { 4884 cj[k] = cstart + *aj++; 4885 ca[k++] = *aa++; 4886 } 4887 /* off-diagonal portion of A */ 4888 for (j=jo; j<ncols_o; j++) { 4889 cj[k] = cmap[*bj++]; 4890 ca[k++] = *ba++; 4891 } 4892 } 4893 /* put together the new matrix */ 4894 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4895 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4896 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4897 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4898 mat->free_a = PETSC_TRUE; 4899 mat->free_ij = PETSC_TRUE; 4900 mat->nonew = 0; 4901 } else if (scall == MAT_REUSE_MATRIX){ 4902 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4903 ci = mat->i; cj = mat->j; cam = mat->a; 4904 for (i=0; i<am; i++) { 4905 /* off-diagonal portion of A */ 4906 ncols_o = bi[i+1] - bi[i]; 4907 for (jo=0; jo<ncols_o; jo++) { 4908 col = cmap[*bj]; 4909 if (col >= cstart) break; 4910 *cam++ = *ba++; bj++; 4911 } 4912 /* diagonal portion of A */ 4913 ncols_d = ai[i+1] - ai[i]; 4914 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4915 /* off-diagonal portion of A */ 4916 for (j=jo; j<ncols_o; j++) { 4917 *cam++ = *ba++; bj++; 4918 } 4919 } 4920 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4921 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4922 PetscFunctionReturn(0); 4923 } 4924 4925 #undef __FUNCT__ 4926 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4927 /*@C 4928 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4929 4930 Not Collective 4931 4932 Input Parameters: 4933 + A - the matrix 4934 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4935 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4936 4937 Output Parameter: 4938 . A_loc - the local sequential matrix generated 4939 4940 Level: developer 4941 4942 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4943 4944 @*/ 4945 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4946 { 4947 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4948 PetscErrorCode ierr; 4949 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4950 IS isrowa,iscola; 4951 Mat *aloc; 4952 PetscBool match; 4953 4954 PetscFunctionBegin; 4955 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4956 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4957 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4958 if (!row){ 4959 start = A->rmap->rstart; end = A->rmap->rend; 4960 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4961 } else { 4962 isrowa = *row; 4963 } 4964 if (!col){ 4965 start = A->cmap->rstart; 4966 cmap = a->garray; 4967 nzA = a->A->cmap->n; 4968 nzB = a->B->cmap->n; 4969 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4970 ncols = 0; 4971 for (i=0; i<nzB; i++) { 4972 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4973 else break; 4974 } 4975 imark = i; 4976 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4977 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4978 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4979 } else { 4980 iscola = *col; 4981 } 4982 if (scall != MAT_INITIAL_MATRIX){ 4983 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4984 aloc[0] = *A_loc; 4985 } 4986 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4987 *A_loc = aloc[0]; 4988 ierr = PetscFree(aloc);CHKERRQ(ierr); 4989 if (!row){ 4990 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4991 } 4992 if (!col){ 4993 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4994 } 4995 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4996 PetscFunctionReturn(0); 4997 } 4998 4999 #undef __FUNCT__ 5000 #define __FUNCT__ "MatGetBrowsOfAcols" 5001 /*@C 5002 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5003 5004 Collective on Mat 5005 5006 Input Parameters: 5007 + A,B - the matrices in mpiaij format 5008 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5009 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5010 5011 Output Parameter: 5012 + rowb, colb - index sets of rows and columns of B to extract 5013 . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows 5014 - B_seq - the sequential matrix generated 5015 5016 Level: developer 5017 5018 @*/ 5019 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 5020 { 5021 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5022 PetscErrorCode ierr; 5023 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5024 IS isrowb,iscolb; 5025 Mat *bseq; 5026 5027 PetscFunctionBegin; 5028 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5029 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5030 } 5031 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5032 5033 if (scall == MAT_INITIAL_MATRIX){ 5034 start = A->cmap->rstart; 5035 cmap = a->garray; 5036 nzA = a->A->cmap->n; 5037 nzB = a->B->cmap->n; 5038 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5039 ncols = 0; 5040 for (i=0; i<nzB; i++) { /* row < local row index */ 5041 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5042 else break; 5043 } 5044 imark = i; 5045 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5046 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5047 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5048 *brstart = imark; 5049 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5050 } else { 5051 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5052 isrowb = *rowb; iscolb = *colb; 5053 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5054 bseq[0] = *B_seq; 5055 } 5056 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5057 *B_seq = bseq[0]; 5058 ierr = PetscFree(bseq);CHKERRQ(ierr); 5059 if (!rowb){ 5060 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5061 } else { 5062 *rowb = isrowb; 5063 } 5064 if (!colb){ 5065 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5066 } else { 5067 *colb = iscolb; 5068 } 5069 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5070 PetscFunctionReturn(0); 5071 } 5072 5073 #undef __FUNCT__ 5074 #define __FUNCT__ "MatGetBrowsOfAoCols" 5075 /*@C 5076 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5077 of the OFF-DIAGONAL portion of local A 5078 5079 Collective on Mat 5080 5081 Input Parameters: 5082 + A,B - the matrices in mpiaij format 5083 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5084 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5085 . startsj_r - similar to startsj for receives 5086 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5087 5088 Output Parameter: 5089 + B_oth - the sequential matrix generated 5090 5091 Level: developer 5092 5093 @*/ 5094 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5095 { 5096 VecScatter_MPI_General *gen_to,*gen_from; 5097 PetscErrorCode ierr; 5098 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5099 Mat_SeqAIJ *b_oth; 5100 VecScatter ctx=a->Mvctx; 5101 MPI_Comm comm=((PetscObject)ctx)->comm; 5102 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5103 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5104 PetscScalar *rvalues,*svalues; 5105 MatScalar *b_otha,*bufa,*bufA; 5106 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5107 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5108 MPI_Status *sstatus,rstatus; 5109 PetscMPIInt jj; 5110 PetscInt *cols,sbs,rbs; 5111 PetscScalar *vals; 5112 5113 PetscFunctionBegin; 5114 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5115 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5116 } 5117 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5118 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5119 5120 gen_to = (VecScatter_MPI_General*)ctx->todata; 5121 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5122 rvalues = gen_from->values; /* holds the length of receiving row */ 5123 svalues = gen_to->values; /* holds the length of sending row */ 5124 nrecvs = gen_from->n; 5125 nsends = gen_to->n; 5126 5127 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5128 srow = gen_to->indices; /* local row index to be sent */ 5129 sstarts = gen_to->starts; 5130 sprocs = gen_to->procs; 5131 sstatus = gen_to->sstatus; 5132 sbs = gen_to->bs; 5133 rstarts = gen_from->starts; 5134 rprocs = gen_from->procs; 5135 rbs = gen_from->bs; 5136 5137 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5138 if (scall == MAT_INITIAL_MATRIX){ 5139 /* i-array */ 5140 /*---------*/ 5141 /* post receives */ 5142 for (i=0; i<nrecvs; i++){ 5143 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5144 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5145 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5146 } 5147 5148 /* pack the outgoing message */ 5149 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5150 sstartsj[0] = 0; rstartsj[0] = 0; 5151 len = 0; /* total length of j or a array to be sent */ 5152 k = 0; 5153 for (i=0; i<nsends; i++){ 5154 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5155 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5156 for (j=0; j<nrows; j++) { 5157 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5158 for (l=0; l<sbs; l++){ 5159 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5160 rowlen[j*sbs+l] = ncols; 5161 len += ncols; 5162 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5163 } 5164 k++; 5165 } 5166 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5167 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5168 } 5169 /* recvs and sends of i-array are completed */ 5170 i = nrecvs; 5171 while (i--) { 5172 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5173 } 5174 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5175 5176 /* allocate buffers for sending j and a arrays */ 5177 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5178 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5179 5180 /* create i-array of B_oth */ 5181 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5182 b_othi[0] = 0; 5183 len = 0; /* total length of j or a array to be received */ 5184 k = 0; 5185 for (i=0; i<nrecvs; i++){ 5186 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5187 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5188 for (j=0; j<nrows; j++) { 5189 b_othi[k+1] = b_othi[k] + rowlen[j]; 5190 len += rowlen[j]; k++; 5191 } 5192 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5193 } 5194 5195 /* allocate space for j and a arrrays of B_oth */ 5196 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5197 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5198 5199 /* j-array */ 5200 /*---------*/ 5201 /* post receives of j-array */ 5202 for (i=0; i<nrecvs; i++){ 5203 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5204 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5205 } 5206 5207 /* pack the outgoing message j-array */ 5208 k = 0; 5209 for (i=0; i<nsends; i++){ 5210 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5211 bufJ = bufj+sstartsj[i]; 5212 for (j=0; j<nrows; j++) { 5213 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5214 for (ll=0; ll<sbs; ll++){ 5215 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5216 for (l=0; l<ncols; l++){ 5217 *bufJ++ = cols[l]; 5218 } 5219 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5220 } 5221 } 5222 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5223 } 5224 5225 /* recvs and sends of j-array are completed */ 5226 i = nrecvs; 5227 while (i--) { 5228 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5229 } 5230 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5231 } else if (scall == MAT_REUSE_MATRIX){ 5232 sstartsj = *startsj; 5233 rstartsj = *startsj_r; 5234 bufa = *bufa_ptr; 5235 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5236 b_otha = b_oth->a; 5237 } else { 5238 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5239 } 5240 5241 /* a-array */ 5242 /*---------*/ 5243 /* post receives of a-array */ 5244 for (i=0; i<nrecvs; i++){ 5245 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5246 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5247 } 5248 5249 /* pack the outgoing message a-array */ 5250 k = 0; 5251 for (i=0; i<nsends; i++){ 5252 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5253 bufA = bufa+sstartsj[i]; 5254 for (j=0; j<nrows; j++) { 5255 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5256 for (ll=0; ll<sbs; ll++){ 5257 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5258 for (l=0; l<ncols; l++){ 5259 *bufA++ = vals[l]; 5260 } 5261 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5262 } 5263 } 5264 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5265 } 5266 /* recvs and sends of a-array are completed */ 5267 i = nrecvs; 5268 while (i--) { 5269 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5270 } 5271 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5272 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5273 5274 if (scall == MAT_INITIAL_MATRIX){ 5275 /* put together the new matrix */ 5276 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5277 5278 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5279 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5280 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5281 b_oth->free_a = PETSC_TRUE; 5282 b_oth->free_ij = PETSC_TRUE; 5283 b_oth->nonew = 0; 5284 5285 ierr = PetscFree(bufj);CHKERRQ(ierr); 5286 if (!startsj || !bufa_ptr){ 5287 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5288 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5289 } else { 5290 *startsj = sstartsj; 5291 *startsj_r = rstartsj; 5292 *bufa_ptr = bufa; 5293 } 5294 } 5295 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5296 PetscFunctionReturn(0); 5297 } 5298 5299 #undef __FUNCT__ 5300 #define __FUNCT__ "MatGetCommunicationStructs" 5301 /*@C 5302 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5303 5304 Not Collective 5305 5306 Input Parameters: 5307 . A - The matrix in mpiaij format 5308 5309 Output Parameter: 5310 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5311 . colmap - A map from global column index to local index into lvec 5312 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5313 5314 Level: developer 5315 5316 @*/ 5317 #if defined (PETSC_USE_CTABLE) 5318 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5319 #else 5320 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5321 #endif 5322 { 5323 Mat_MPIAIJ *a; 5324 5325 PetscFunctionBegin; 5326 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5327 PetscValidPointer(lvec, 2); 5328 PetscValidPointer(colmap, 3); 5329 PetscValidPointer(multScatter, 4); 5330 a = (Mat_MPIAIJ *) A->data; 5331 if (lvec) *lvec = a->lvec; 5332 if (colmap) *colmap = a->colmap; 5333 if (multScatter) *multScatter = a->Mvctx; 5334 PetscFunctionReturn(0); 5335 } 5336 5337 EXTERN_C_BEGIN 5338 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5339 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5340 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5341 EXTERN_C_END 5342 5343 #undef __FUNCT__ 5344 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5345 /* 5346 Computes (B'*A')' since computing B*A directly is untenable 5347 5348 n p p 5349 ( ) ( ) ( ) 5350 m ( A ) * n ( B ) = m ( C ) 5351 ( ) ( ) ( ) 5352 5353 */ 5354 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5355 { 5356 PetscErrorCode ierr; 5357 Mat At,Bt,Ct; 5358 5359 PetscFunctionBegin; 5360 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5361 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5362 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5363 ierr = MatDestroy(&At);CHKERRQ(ierr); 5364 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5365 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5366 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5367 PetscFunctionReturn(0); 5368 } 5369 5370 #undef __FUNCT__ 5371 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5372 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5373 { 5374 PetscErrorCode ierr; 5375 PetscInt m=A->rmap->n,n=B->cmap->n; 5376 Mat Cmat; 5377 5378 PetscFunctionBegin; 5379 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5380 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5381 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5382 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5383 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5384 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5385 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5386 *C = Cmat; 5387 PetscFunctionReturn(0); 5388 } 5389 5390 /* ----------------------------------------------------------------*/ 5391 #undef __FUNCT__ 5392 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5393 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5394 { 5395 PetscErrorCode ierr; 5396 5397 PetscFunctionBegin; 5398 if (scall == MAT_INITIAL_MATRIX){ 5399 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5400 } 5401 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5402 PetscFunctionReturn(0); 5403 } 5404 5405 EXTERN_C_BEGIN 5406 #if defined(PETSC_HAVE_MUMPS) 5407 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5408 #endif 5409 #if defined(PETSC_HAVE_PASTIX) 5410 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5411 #endif 5412 #if defined(PETSC_HAVE_SUPERLU_DIST) 5413 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5414 #endif 5415 #if defined(PETSC_HAVE_SPOOLES) 5416 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5417 #endif 5418 EXTERN_C_END 5419 5420 /*MC 5421 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5422 5423 Options Database Keys: 5424 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5425 5426 Level: beginner 5427 5428 .seealso: MatCreateMPIAIJ() 5429 M*/ 5430 5431 EXTERN_C_BEGIN 5432 #undef __FUNCT__ 5433 #define __FUNCT__ "MatCreate_MPIAIJ" 5434 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5435 { 5436 Mat_MPIAIJ *b; 5437 PetscErrorCode ierr; 5438 PetscMPIInt size; 5439 5440 PetscFunctionBegin; 5441 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5442 5443 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5444 B->data = (void*)b; 5445 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5446 B->rmap->bs = 1; 5447 B->assembled = PETSC_FALSE; 5448 5449 B->insertmode = NOT_SET_VALUES; 5450 b->size = size; 5451 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5452 5453 /* build cache for off array entries formed */ 5454 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5455 b->donotstash = PETSC_FALSE; 5456 b->colmap = 0; 5457 b->garray = 0; 5458 b->roworiented = PETSC_TRUE; 5459 5460 /* stuff used for matrix vector multiply */ 5461 b->lvec = PETSC_NULL; 5462 b->Mvctx = PETSC_NULL; 5463 5464 /* stuff for MatGetRow() */ 5465 b->rowindices = 0; 5466 b->rowvalues = 0; 5467 b->getrowactive = PETSC_FALSE; 5468 5469 #if defined(PETSC_HAVE_SPOOLES) 5470 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5471 "MatGetFactor_mpiaij_spooles", 5472 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5473 #endif 5474 #if defined(PETSC_HAVE_MUMPS) 5475 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5476 "MatGetFactor_aij_mumps", 5477 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5478 #endif 5479 #if defined(PETSC_HAVE_PASTIX) 5480 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5481 "MatGetFactor_mpiaij_pastix", 5482 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5483 #endif 5484 #if defined(PETSC_HAVE_SUPERLU_DIST) 5485 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5486 "MatGetFactor_mpiaij_superlu_dist", 5487 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5488 #endif 5489 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5490 "MatStoreValues_MPIAIJ", 5491 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5492 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5493 "MatRetrieveValues_MPIAIJ", 5494 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5495 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5496 "MatGetDiagonalBlock_MPIAIJ", 5497 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5498 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5499 "MatIsTranspose_MPIAIJ", 5500 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5501 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5502 "MatMPIAIJSetPreallocation_MPIAIJ", 5503 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5504 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5505 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5506 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5507 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5508 "MatDiagonalScaleLocal_MPIAIJ", 5509 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5510 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5511 "MatConvert_MPIAIJ_MPIAIJPERM", 5512 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5513 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5514 "MatConvert_MPIAIJ_MPIAIJCRL", 5515 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5516 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5517 "MatConvert_MPIAIJ_MPISBAIJ", 5518 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5519 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5520 "MatMatMult_MPIDense_MPIAIJ", 5521 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5522 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5523 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5524 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5525 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5526 "MatMatMultNumeric_MPIDense_MPIAIJ", 5527 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5528 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5529 PetscFunctionReturn(0); 5530 } 5531 EXTERN_C_END 5532 5533 #undef __FUNCT__ 5534 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5535 /*@ 5536 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5537 and "off-diagonal" part of the matrix in CSR format. 5538 5539 Collective on MPI_Comm 5540 5541 Input Parameters: 5542 + comm - MPI communicator 5543 . m - number of local rows (Cannot be PETSC_DECIDE) 5544 . n - This value should be the same as the local size used in creating the 5545 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5546 calculated if N is given) For square matrices n is almost always m. 5547 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5548 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5549 . i - row indices for "diagonal" portion of matrix 5550 . j - column indices 5551 . a - matrix values 5552 . oi - row indices for "off-diagonal" portion of matrix 5553 . oj - column indices 5554 - oa - matrix values 5555 5556 Output Parameter: 5557 . mat - the matrix 5558 5559 Level: advanced 5560 5561 Notes: 5562 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5563 must free the arrays once the matrix has been destroyed and not before. 5564 5565 The i and j indices are 0 based 5566 5567 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5568 5569 This sets local rows and cannot be used to set off-processor values. 5570 5571 You cannot later use MatSetValues() to change values in this matrix. 5572 5573 .keywords: matrix, aij, compressed row, sparse, parallel 5574 5575 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5576 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5577 @*/ 5578 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5579 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5580 { 5581 PetscErrorCode ierr; 5582 Mat_MPIAIJ *maij; 5583 5584 PetscFunctionBegin; 5585 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5586 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5587 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5588 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5589 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5590 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5591 maij = (Mat_MPIAIJ*) (*mat)->data; 5592 maij->donotstash = PETSC_TRUE; 5593 (*mat)->preallocated = PETSC_TRUE; 5594 5595 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5596 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5597 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5598 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5599 5600 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5601 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5602 5603 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5604 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5605 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5606 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5607 5608 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5609 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5610 PetscFunctionReturn(0); 5611 } 5612 5613 /* 5614 Special version for direct calls from Fortran 5615 */ 5616 #include <private/fortranimpl.h> 5617 5618 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5619 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5620 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5621 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5622 #endif 5623 5624 /* Change these macros so can be used in void function */ 5625 #undef CHKERRQ 5626 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5627 #undef SETERRQ2 5628 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5629 #undef SETERRQ 5630 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5631 5632 EXTERN_C_BEGIN 5633 #undef __FUNCT__ 5634 #define __FUNCT__ "matsetvaluesmpiaij_" 5635 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5636 { 5637 Mat mat = *mmat; 5638 PetscInt m = *mm, n = *mn; 5639 InsertMode addv = *maddv; 5640 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5641 PetscScalar value; 5642 PetscErrorCode ierr; 5643 5644 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5645 if (mat->insertmode == NOT_SET_VALUES) { 5646 mat->insertmode = addv; 5647 } 5648 #if defined(PETSC_USE_DEBUG) 5649 else if (mat->insertmode != addv) { 5650 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5651 } 5652 #endif 5653 { 5654 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5655 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5656 PetscBool roworiented = aij->roworiented; 5657 5658 /* Some Variables required in the macro */ 5659 Mat A = aij->A; 5660 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5661 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5662 MatScalar *aa = a->a; 5663 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5664 Mat B = aij->B; 5665 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5666 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5667 MatScalar *ba = b->a; 5668 5669 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5670 PetscInt nonew = a->nonew; 5671 MatScalar *ap1,*ap2; 5672 5673 PetscFunctionBegin; 5674 for (i=0; i<m; i++) { 5675 if (im[i] < 0) continue; 5676 #if defined(PETSC_USE_DEBUG) 5677 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5678 #endif 5679 if (im[i] >= rstart && im[i] < rend) { 5680 row = im[i] - rstart; 5681 lastcol1 = -1; 5682 rp1 = aj + ai[row]; 5683 ap1 = aa + ai[row]; 5684 rmax1 = aimax[row]; 5685 nrow1 = ailen[row]; 5686 low1 = 0; 5687 high1 = nrow1; 5688 lastcol2 = -1; 5689 rp2 = bj + bi[row]; 5690 ap2 = ba + bi[row]; 5691 rmax2 = bimax[row]; 5692 nrow2 = bilen[row]; 5693 low2 = 0; 5694 high2 = nrow2; 5695 5696 for (j=0; j<n; j++) { 5697 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5698 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5699 if (in[j] >= cstart && in[j] < cend){ 5700 col = in[j] - cstart; 5701 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5702 } else if (in[j] < 0) continue; 5703 #if defined(PETSC_USE_DEBUG) 5704 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5705 #endif 5706 else { 5707 if (mat->was_assembled) { 5708 if (!aij->colmap) { 5709 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5710 } 5711 #if defined (PETSC_USE_CTABLE) 5712 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5713 col--; 5714 #else 5715 col = aij->colmap[in[j]] - 1; 5716 #endif 5717 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5718 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5719 col = in[j]; 5720 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5721 B = aij->B; 5722 b = (Mat_SeqAIJ*)B->data; 5723 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5724 rp2 = bj + bi[row]; 5725 ap2 = ba + bi[row]; 5726 rmax2 = bimax[row]; 5727 nrow2 = bilen[row]; 5728 low2 = 0; 5729 high2 = nrow2; 5730 bm = aij->B->rmap->n; 5731 ba = b->a; 5732 } 5733 } else col = in[j]; 5734 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5735 } 5736 } 5737 } else { 5738 if (!aij->donotstash) { 5739 if (roworiented) { 5740 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5741 } else { 5742 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5743 } 5744 } 5745 } 5746 }} 5747 PetscFunctionReturnVoid(); 5748 } 5749 EXTERN_C_END 5750 5751