1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 342 #if defined (PETSC_USE_CTABLE) 343 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 344 for (i=0; i<n; i++){ 345 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 346 } 347 #else 348 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 349 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 352 #endif 353 PetscFunctionReturn(0); 354 } 355 356 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 357 { \ 358 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 359 lastcol1 = col;\ 360 while (high1-low1 > 5) { \ 361 t = (low1+high1)/2; \ 362 if (rp1[t] > col) high1 = t; \ 363 else low1 = t; \ 364 } \ 365 for (_i=low1; _i<high1; _i++) { \ 366 if (rp1[_i] > col) break; \ 367 if (rp1[_i] == col) { \ 368 if (addv == ADD_VALUES) ap1[_i] += value; \ 369 else ap1[_i] = value; \ 370 goto a_noinsert; \ 371 } \ 372 } \ 373 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 374 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 375 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 376 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 377 N = nrow1++ - 1; a->nz++; high1++; \ 378 /* shift up all the later entries in this row */ \ 379 for (ii=N; ii>=_i; ii--) { \ 380 rp1[ii+1] = rp1[ii]; \ 381 ap1[ii+1] = ap1[ii]; \ 382 } \ 383 rp1[_i] = col; \ 384 ap1[_i] = value; \ 385 a_noinsert: ; \ 386 ailen[row] = nrow1; \ 387 } 388 389 390 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 391 { \ 392 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 393 lastcol2 = col;\ 394 while (high2-low2 > 5) { \ 395 t = (low2+high2)/2; \ 396 if (rp2[t] > col) high2 = t; \ 397 else low2 = t; \ 398 } \ 399 for (_i=low2; _i<high2; _i++) { \ 400 if (rp2[_i] > col) break; \ 401 if (rp2[_i] == col) { \ 402 if (addv == ADD_VALUES) ap2[_i] += value; \ 403 else ap2[_i] = value; \ 404 goto b_noinsert; \ 405 } \ 406 } \ 407 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 410 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 411 N = nrow2++ - 1; b->nz++; high2++; \ 412 /* shift up all the later entries in this row */ \ 413 for (ii=N; ii>=_i; ii--) { \ 414 rp2[ii+1] = rp2[ii]; \ 415 ap2[ii+1] = ap2[ii]; \ 416 } \ 417 rp2[_i] = col; \ 418 ap2[_i] = value; \ 419 b_noinsert: ; \ 420 bilen[row] = nrow2; \ 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 425 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 426 { 427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 428 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 429 PetscErrorCode ierr; 430 PetscInt l,*garray = mat->garray,diag; 431 432 PetscFunctionBegin; 433 /* code only works for square matrices A */ 434 435 /* find size of row to the left of the diagonal part */ 436 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 437 row = row - diag; 438 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 439 if (garray[b->j[b->i[row]+l]] > diag) break; 440 } 441 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 442 443 /* diagonal part */ 444 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 445 446 /* right of diagonal part */ 447 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 448 PetscFunctionReturn(0); 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValues_MPIAIJ" 453 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 454 { 455 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 456 PetscScalar value; 457 PetscErrorCode ierr; 458 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 459 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 460 PetscBool roworiented = aij->roworiented; 461 462 /* Some Variables required in the macro */ 463 Mat A = aij->A; 464 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 465 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 466 MatScalar *aa = a->a; 467 PetscBool ignorezeroentries = a->ignorezeroentries; 468 Mat B = aij->B; 469 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 470 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 471 MatScalar *ba = b->a; 472 473 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 474 PetscInt nonew; 475 MatScalar *ap1,*ap2; 476 477 PetscFunctionBegin; 478 if (v) PetscValidScalarPointer(v,6); 479 for (i=0; i<m; i++) { 480 if (im[i] < 0) continue; 481 #if defined(PETSC_USE_DEBUG) 482 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 483 #endif 484 if (im[i] >= rstart && im[i] < rend) { 485 row = im[i] - rstart; 486 lastcol1 = -1; 487 rp1 = aj + ai[row]; 488 ap1 = aa + ai[row]; 489 rmax1 = aimax[row]; 490 nrow1 = ailen[row]; 491 low1 = 0; 492 high1 = nrow1; 493 lastcol2 = -1; 494 rp2 = bj + bi[row]; 495 ap2 = ba + bi[row]; 496 rmax2 = bimax[row]; 497 nrow2 = bilen[row]; 498 low2 = 0; 499 high2 = nrow2; 500 501 for (j=0; j<n; j++) { 502 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 503 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 504 if (in[j] >= cstart && in[j] < cend){ 505 col = in[j] - cstart; 506 nonew = a->nonew; 507 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 508 } else if (in[j] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 511 #endif 512 else { 513 if (mat->was_assembled) { 514 if (!aij->colmap) { 515 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 516 } 517 #if defined (PETSC_USE_CTABLE) 518 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 519 col--; 520 #else 521 col = aij->colmap[in[j]] - 1; 522 #endif 523 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 524 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 525 col = in[j]; 526 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 527 B = aij->B; 528 b = (Mat_SeqAIJ*)B->data; 529 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 530 rp2 = bj + bi[row]; 531 ap2 = ba + bi[row]; 532 rmax2 = bimax[row]; 533 nrow2 = bilen[row]; 534 low2 = 0; 535 high2 = nrow2; 536 bm = aij->B->rmap->n; 537 ba = b->a; 538 } 539 } else col = in[j]; 540 nonew = b->nonew; 541 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 542 } 543 } 544 } else { 545 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 546 if (!aij->donotstash) { 547 mat->assembled = PETSC_FALSE; 548 if (roworiented) { 549 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 550 } else { 551 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 552 } 553 } 554 } 555 } 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatGetValues_MPIAIJ" 561 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 562 { 563 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 564 PetscErrorCode ierr; 565 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 566 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 567 568 PetscFunctionBegin; 569 for (i=0; i<m; i++) { 570 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 571 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 572 if (idxm[i] >= rstart && idxm[i] < rend) { 573 row = idxm[i] - rstart; 574 for (j=0; j<n; j++) { 575 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 576 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 577 if (idxn[j] >= cstart && idxn[j] < cend){ 578 col = idxn[j] - cstart; 579 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 580 } else { 581 if (!aij->colmap) { 582 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 583 } 584 #if defined (PETSC_USE_CTABLE) 585 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 586 col --; 587 #else 588 col = aij->colmap[idxn[j]] - 1; 589 #endif 590 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 591 else { 592 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 593 } 594 } 595 } 596 } else { 597 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 598 } 599 } 600 PetscFunctionReturn(0); 601 } 602 603 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 607 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 608 { 609 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 610 PetscErrorCode ierr; 611 PetscInt nstash,reallocs; 612 InsertMode addv; 613 614 PetscFunctionBegin; 615 if (aij->donotstash || mat->nooffprocentries) { 616 PetscFunctionReturn(0); 617 } 618 619 /* make sure all processors are either in INSERTMODE or ADDMODE */ 620 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 621 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 622 mat->insertmode = addv; /* in case this processor had no cache */ 623 624 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 625 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 626 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 627 PetscFunctionReturn(0); 628 } 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 632 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 633 { 634 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 635 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 636 PetscErrorCode ierr; 637 PetscMPIInt n; 638 PetscInt i,j,rstart,ncols,flg; 639 PetscInt *row,*col; 640 PetscBool other_disassembled; 641 PetscScalar *val; 642 InsertMode addv = mat->insertmode; 643 644 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 645 PetscFunctionBegin; 646 if (!aij->donotstash && !mat->nooffprocentries) { 647 while (1) { 648 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 649 if (!flg) break; 650 651 for (i=0; i<n;) { 652 /* Now identify the consecutive vals belonging to the same row */ 653 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 654 if (j < n) ncols = j-i; 655 else ncols = n-i; 656 /* Now assemble all these values with a single function call */ 657 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 658 i = j; 659 } 660 } 661 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 662 } 663 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 664 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 665 666 /* determine if any processor has disassembled, if so we must 667 also disassemble ourselfs, in order that we may reassemble. */ 668 /* 669 if nonzero structure of submatrix B cannot change then we know that 670 no processor disassembled thus we can skip this stuff 671 */ 672 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 673 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 674 if (mat->was_assembled && !other_disassembled) { 675 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 } 678 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 679 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 680 } 681 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 682 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 684 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 685 686 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 687 aij->rowvalues = 0; 688 689 /* used by MatAXPY() */ 690 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 691 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 692 693 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 694 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 695 PetscFunctionReturn(0); 696 } 697 698 #undef __FUNCT__ 699 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 700 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 701 { 702 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 703 PetscErrorCode ierr; 704 705 PetscFunctionBegin; 706 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 707 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatZeroRows_MPIAIJ" 713 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 714 { 715 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 716 PetscErrorCode ierr; 717 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 718 PetscInt i,*owners = A->rmap->range; 719 PetscInt *nprocs,j,idx,nsends,row; 720 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 721 PetscInt *rvalues,count,base,slen,*source; 722 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 723 MPI_Comm comm = ((PetscObject)A)->comm; 724 MPI_Request *send_waits,*recv_waits; 725 MPI_Status recv_status,*send_status; 726 const PetscScalar *xx; 727 PetscScalar *bb; 728 #if defined(PETSC_DEBUG) 729 PetscBool found = PETSC_FALSE; 730 #endif 731 732 PetscFunctionBegin; 733 /* first count number of contributors to each processor */ 734 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 735 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 736 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 737 j = 0; 738 for (i=0; i<N; i++) { 739 if (lastidx > (idx = rows[i])) j = 0; 740 lastidx = idx; 741 for (; j<size; j++) { 742 if (idx >= owners[j] && idx < owners[j+1]) { 743 nprocs[2*j]++; 744 nprocs[2*j+1] = 1; 745 owner[i] = j; 746 #if defined(PETSC_DEBUG) 747 found = PETSC_TRUE; 748 #endif 749 break; 750 } 751 } 752 #if defined(PETSC_DEBUG) 753 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 754 found = PETSC_FALSE; 755 #endif 756 } 757 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 758 759 if (A->nooffproczerorows) { 760 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 761 nrecvs = nsends; 762 nmax = N; 763 } else { 764 /* inform other processors of number of messages and max length*/ 765 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 766 } 767 768 /* post receives: */ 769 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 770 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 771 for (i=0; i<nrecvs; i++) { 772 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 773 } 774 775 /* do sends: 776 1) starts[i] gives the starting index in svalues for stuff going to 777 the ith processor 778 */ 779 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 780 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 781 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 782 starts[0] = 0; 783 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 784 for (i=0; i<N; i++) { 785 svalues[starts[owner[i]]++] = rows[i]; 786 } 787 788 starts[0] = 0; 789 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 790 count = 0; 791 for (i=0; i<size; i++) { 792 if (nprocs[2*i+1]) { 793 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 794 } 795 } 796 ierr = PetscFree(starts);CHKERRQ(ierr); 797 798 base = owners[rank]; 799 800 /* wait on receives */ 801 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 802 count = nrecvs; slen = 0; 803 while (count) { 804 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 805 /* unpack receives into our local space */ 806 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 807 source[imdex] = recv_status.MPI_SOURCE; 808 lens[imdex] = n; 809 slen += n; 810 count--; 811 } 812 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 813 814 /* move the data into the send scatter */ 815 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 816 count = 0; 817 for (i=0; i<nrecvs; i++) { 818 values = rvalues + i*nmax; 819 for (j=0; j<lens[i]; j++) { 820 lrows[count++] = values[j] - base; 821 } 822 } 823 ierr = PetscFree(rvalues);CHKERRQ(ierr); 824 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 825 ierr = PetscFree(owner);CHKERRQ(ierr); 826 ierr = PetscFree(nprocs);CHKERRQ(ierr); 827 828 /* fix right hand side if needed */ 829 if (x && b) { 830 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 831 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 832 for (i=0; i<slen; i++) { 833 bb[lrows[i]] = diag*xx[lrows[i]]; 834 } 835 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 836 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 837 } 838 /* 839 Zero the required rows. If the "diagonal block" of the matrix 840 is square and the user wishes to set the diagonal we use separate 841 code so that MatSetValues() is not called for each diagonal allocating 842 new memory, thus calling lots of mallocs and slowing things down. 843 844 */ 845 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 846 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 848 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 849 } else if (diag != 0.0) { 850 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 851 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 852 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 853 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 854 } 855 for (i = 0; i < slen; i++) { 856 row = lrows[i] + rstart; 857 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 858 } 859 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 860 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 } else { 862 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 863 } 864 ierr = PetscFree(lrows);CHKERRQ(ierr); 865 866 /* wait on sends */ 867 if (nsends) { 868 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 869 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 870 ierr = PetscFree(send_status);CHKERRQ(ierr); 871 } 872 ierr = PetscFree(send_waits);CHKERRQ(ierr); 873 ierr = PetscFree(svalues);CHKERRQ(ierr); 874 PetscFunctionReturn(0); 875 } 876 877 #undef __FUNCT__ 878 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 879 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 880 { 881 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 882 PetscErrorCode ierr; 883 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 884 PetscInt i,*owners = A->rmap->range; 885 PetscInt *nprocs,j,idx,nsends; 886 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 887 PetscInt *rvalues,count,base,slen,*source; 888 PetscInt *lens,*lrows,*values,m; 889 MPI_Comm comm = ((PetscObject)A)->comm; 890 MPI_Request *send_waits,*recv_waits; 891 MPI_Status recv_status,*send_status; 892 const PetscScalar *xx; 893 PetscScalar *bb,*mask; 894 Vec xmask,lmask; 895 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 896 const PetscInt *aj, *ii,*ridx; 897 PetscScalar *aa; 898 #if defined(PETSC_DEBUG) 899 PetscBool found = PETSC_FALSE; 900 #endif 901 902 PetscFunctionBegin; 903 /* first count number of contributors to each processor */ 904 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 905 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 906 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 907 j = 0; 908 for (i=0; i<N; i++) { 909 if (lastidx > (idx = rows[i])) j = 0; 910 lastidx = idx; 911 for (; j<size; j++) { 912 if (idx >= owners[j] && idx < owners[j+1]) { 913 nprocs[2*j]++; 914 nprocs[2*j+1] = 1; 915 owner[i] = j; 916 #if defined(PETSC_DEBUG) 917 found = PETSC_TRUE; 918 #endif 919 break; 920 } 921 } 922 #if defined(PETSC_DEBUG) 923 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 924 found = PETSC_FALSE; 925 #endif 926 } 927 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 928 929 /* inform other processors of number of messages and max length*/ 930 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 931 932 /* post receives: */ 933 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 934 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 935 for (i=0; i<nrecvs; i++) { 936 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 937 } 938 939 /* do sends: 940 1) starts[i] gives the starting index in svalues for stuff going to 941 the ith processor 942 */ 943 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 944 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 945 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 946 starts[0] = 0; 947 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 948 for (i=0; i<N; i++) { 949 svalues[starts[owner[i]]++] = rows[i]; 950 } 951 952 starts[0] = 0; 953 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 954 count = 0; 955 for (i=0; i<size; i++) { 956 if (nprocs[2*i+1]) { 957 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 958 } 959 } 960 ierr = PetscFree(starts);CHKERRQ(ierr); 961 962 base = owners[rank]; 963 964 /* wait on receives */ 965 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 966 count = nrecvs; slen = 0; 967 while (count) { 968 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 969 /* unpack receives into our local space */ 970 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 971 source[imdex] = recv_status.MPI_SOURCE; 972 lens[imdex] = n; 973 slen += n; 974 count--; 975 } 976 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 977 978 /* move the data into the send scatter */ 979 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 980 count = 0; 981 for (i=0; i<nrecvs; i++) { 982 values = rvalues + i*nmax; 983 for (j=0; j<lens[i]; j++) { 984 lrows[count++] = values[j] - base; 985 } 986 } 987 ierr = PetscFree(rvalues);CHKERRQ(ierr); 988 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 989 ierr = PetscFree(owner);CHKERRQ(ierr); 990 ierr = PetscFree(nprocs);CHKERRQ(ierr); 991 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 992 993 /* zero diagonal part of matrix */ 994 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 995 996 /* handle off diagonal part of matrix */ 997 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 998 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 999 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1000 for (i=0; i<slen; i++) { 1001 bb[lrows[i]] = 1; 1002 } 1003 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1004 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1007 if (x) { 1008 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1011 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1012 } 1013 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1014 1015 /* remove zeroed rows of off diagonal matrix */ 1016 ii = aij->i; 1017 for (i=0; i<slen; i++) { 1018 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1019 } 1020 1021 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1022 if (aij->compressedrow.use){ 1023 m = aij->compressedrow.nrows; 1024 ii = aij->compressedrow.i; 1025 ridx = aij->compressedrow.rindex; 1026 for (i=0; i<m; i++){ 1027 n = ii[i+1] - ii[i]; 1028 aj = aij->j + ii[i]; 1029 aa = aij->a + ii[i]; 1030 1031 for (j=0; j<n; j++) { 1032 if (PetscAbsScalar(mask[*aj])) { 1033 if (b) bb[*ridx] -= *aa*xx[*aj]; 1034 *aa = 0.0; 1035 } 1036 aa++; 1037 aj++; 1038 } 1039 ridx++; 1040 } 1041 } else { /* do not use compressed row format */ 1042 m = l->B->rmap->n; 1043 for (i=0; i<m; i++) { 1044 n = ii[i+1] - ii[i]; 1045 aj = aij->j + ii[i]; 1046 aa = aij->a + ii[i]; 1047 for (j=0; j<n; j++) { 1048 if (PetscAbsScalar(mask[*aj])) { 1049 if (b) bb[i] -= *aa*xx[*aj]; 1050 *aa = 0.0; 1051 } 1052 aa++; 1053 aj++; 1054 } 1055 } 1056 } 1057 if (x) { 1058 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1059 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1060 } 1061 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1062 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1063 ierr = PetscFree(lrows);CHKERRQ(ierr); 1064 1065 /* wait on sends */ 1066 if (nsends) { 1067 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1068 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1069 ierr = PetscFree(send_status);CHKERRQ(ierr); 1070 } 1071 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1072 ierr = PetscFree(svalues);CHKERRQ(ierr); 1073 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatMult_MPIAIJ" 1079 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1080 { 1081 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1082 PetscErrorCode ierr; 1083 PetscInt nt; 1084 1085 PetscFunctionBegin; 1086 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1087 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1088 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1089 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1090 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1091 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1097 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1098 { 1099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1109 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1110 { 1111 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 1114 PetscFunctionBegin; 1115 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1117 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1118 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1124 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1125 { 1126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 PetscBool merged; 1129 1130 PetscFunctionBegin; 1131 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1132 /* do nondiagonal part */ 1133 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1134 if (!merged) { 1135 /* send it on its way */ 1136 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1137 /* do local part */ 1138 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1139 /* receive remote parts: note this assumes the values are not actually */ 1140 /* added in yy until the next line, */ 1141 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1142 } else { 1143 /* do local part */ 1144 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1145 /* send it on its way */ 1146 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1147 /* values actually were received in the Begin() but we need to call this nop */ 1148 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1149 } 1150 PetscFunctionReturn(0); 1151 } 1152 1153 EXTERN_C_BEGIN 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1156 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1157 { 1158 MPI_Comm comm; 1159 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1160 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1161 IS Me,Notme; 1162 PetscErrorCode ierr; 1163 PetscInt M,N,first,last,*notme,i; 1164 PetscMPIInt size; 1165 1166 PetscFunctionBegin; 1167 1168 /* Easy test: symmetric diagonal block */ 1169 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1170 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1171 if (!*f) PetscFunctionReturn(0); 1172 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1173 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1174 if (size == 1) PetscFunctionReturn(0); 1175 1176 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1177 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1178 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1179 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1180 for (i=0; i<first; i++) notme[i] = i; 1181 for (i=last; i<M; i++) notme[i-last+first] = i; 1182 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1183 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1184 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1185 Aoff = Aoffs[0]; 1186 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1187 Boff = Boffs[0]; 1188 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1189 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1191 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1193 ierr = PetscFree(notme);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 EXTERN_C_END 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1200 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1201 { 1202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 /* do nondiagonal part */ 1207 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1208 /* send it on its way */ 1209 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 /* do local part */ 1211 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1212 /* receive remote parts */ 1213 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 /* 1218 This only works correctly for square matrices where the subblock A->A is the 1219 diagonal block 1220 */ 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1223 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1224 { 1225 PetscErrorCode ierr; 1226 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1227 1228 PetscFunctionBegin; 1229 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1230 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1231 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatScale_MPIAIJ" 1237 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1238 { 1239 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1244 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatDestroy_MPIAIJ" 1250 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1251 { 1252 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 #if defined(PETSC_USE_LOG) 1257 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1258 #endif 1259 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1260 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1261 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1263 #if defined (PETSC_USE_CTABLE) 1264 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1265 #else 1266 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1267 #endif 1268 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1269 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1270 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1271 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1272 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1273 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1274 1275 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1289 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1290 { 1291 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1292 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1293 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1294 PetscErrorCode ierr; 1295 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1296 int fd; 1297 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1298 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1299 PetscScalar *column_values; 1300 PetscInt message_count,flowcontrolcount; 1301 1302 PetscFunctionBegin; 1303 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1304 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1305 nz = A->nz + B->nz; 1306 if (!rank) { 1307 header[0] = MAT_FILE_CLASSID; 1308 header[1] = mat->rmap->N; 1309 header[2] = mat->cmap->N; 1310 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1311 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1312 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1313 /* get largest number of rows any processor has */ 1314 rlen = mat->rmap->n; 1315 range = mat->rmap->range; 1316 for (i=1; i<size; i++) { 1317 rlen = PetscMax(rlen,range[i+1] - range[i]); 1318 } 1319 } else { 1320 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1321 rlen = mat->rmap->n; 1322 } 1323 1324 /* load up the local row counts */ 1325 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1326 for (i=0; i<mat->rmap->n; i++) { 1327 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1328 } 1329 1330 /* store the row lengths to the file */ 1331 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1332 if (!rank) { 1333 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1334 for (i=1; i<size; i++) { 1335 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1336 rlen = range[i+1] - range[i]; 1337 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1338 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1339 } 1340 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1341 } else { 1342 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1343 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1344 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1345 } 1346 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1347 1348 /* load up the local column indices */ 1349 nzmax = nz; /* )th processor needs space a largest processor needs */ 1350 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1351 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1352 cnt = 0; 1353 for (i=0; i<mat->rmap->n; i++) { 1354 for (j=B->i[i]; j<B->i[i+1]; j++) { 1355 if ( (col = garray[B->j[j]]) > cstart) break; 1356 column_indices[cnt++] = col; 1357 } 1358 for (k=A->i[i]; k<A->i[i+1]; k++) { 1359 column_indices[cnt++] = A->j[k] + cstart; 1360 } 1361 for (; j<B->i[i+1]; j++) { 1362 column_indices[cnt++] = garray[B->j[j]]; 1363 } 1364 } 1365 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1366 1367 /* store the column indices to the file */ 1368 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1369 if (!rank) { 1370 MPI_Status status; 1371 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1372 for (i=1; i<size; i++) { 1373 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1374 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1375 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1376 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1377 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1378 } 1379 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1380 } else { 1381 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1382 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1383 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1384 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1385 } 1386 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1387 1388 /* load up the local column values */ 1389 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1390 cnt = 0; 1391 for (i=0; i<mat->rmap->n; i++) { 1392 for (j=B->i[i]; j<B->i[i+1]; j++) { 1393 if ( garray[B->j[j]] > cstart) break; 1394 column_values[cnt++] = B->a[j]; 1395 } 1396 for (k=A->i[i]; k<A->i[i+1]; k++) { 1397 column_values[cnt++] = A->a[k]; 1398 } 1399 for (; j<B->i[i+1]; j++) { 1400 column_values[cnt++] = B->a[j]; 1401 } 1402 } 1403 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1404 1405 /* store the column values to the file */ 1406 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1407 if (!rank) { 1408 MPI_Status status; 1409 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1410 for (i=1; i<size; i++) { 1411 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1412 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1413 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1414 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1415 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1416 } 1417 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1418 } else { 1419 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1420 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1421 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1422 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1423 } 1424 ierr = PetscFree(column_values);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1430 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1431 { 1432 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1433 PetscErrorCode ierr; 1434 PetscMPIInt rank = aij->rank,size = aij->size; 1435 PetscBool isdraw,iascii,isbinary; 1436 PetscViewer sviewer; 1437 PetscViewerFormat format; 1438 1439 PetscFunctionBegin; 1440 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1441 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1442 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1443 if (iascii) { 1444 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1445 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1446 MatInfo info; 1447 PetscBool inodes; 1448 1449 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1450 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1451 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1452 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1453 if (!inodes) { 1454 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1455 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1456 } else { 1457 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1458 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1459 } 1460 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1462 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1463 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1464 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1465 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1467 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1470 PetscInt inodecount,inodelimit,*inodes; 1471 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1472 if (inodes) { 1473 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1474 } else { 1475 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1476 } 1477 PetscFunctionReturn(0); 1478 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1479 PetscFunctionReturn(0); 1480 } 1481 } else if (isbinary) { 1482 if (size == 1) { 1483 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1484 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1485 } else { 1486 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1487 } 1488 PetscFunctionReturn(0); 1489 } else if (isdraw) { 1490 PetscDraw draw; 1491 PetscBool isnull; 1492 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1493 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1494 } 1495 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 /* assemble the entire matrix onto first processor. */ 1501 Mat A; 1502 Mat_SeqAIJ *Aloc; 1503 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1504 MatScalar *a; 1505 1506 if (mat->rmap->N > 1024) { 1507 PetscBool flg = PETSC_FALSE; 1508 1509 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1510 if (!flg) { 1511 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1512 } 1513 } 1514 1515 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1516 if (!rank) { 1517 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1518 } else { 1519 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1520 } 1521 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1522 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1523 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1524 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1525 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1526 1527 /* copy over the A part */ 1528 Aloc = (Mat_SeqAIJ*)aij->A->data; 1529 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1530 row = mat->rmap->rstart; 1531 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1532 for (i=0; i<m; i++) { 1533 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1534 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1535 } 1536 aj = Aloc->j; 1537 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1538 1539 /* copy over the B part */ 1540 Aloc = (Mat_SeqAIJ*)aij->B->data; 1541 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1544 ct = cols; 1545 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1546 for (i=0; i<m; i++) { 1547 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1548 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1549 } 1550 ierr = PetscFree(ct);CHKERRQ(ierr); 1551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1553 /* 1554 Everyone has to call to draw the matrix since the graphics waits are 1555 synchronized across all processors that share the PetscDraw object 1556 */ 1557 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1558 if (!rank) { 1559 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1560 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1561 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1562 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1563 } 1564 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1565 ierr = MatDestroy(&A);CHKERRQ(ierr); 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatView_MPIAIJ" 1572 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1573 { 1574 PetscErrorCode ierr; 1575 PetscBool iascii,isdraw,issocket,isbinary; 1576 1577 PetscFunctionBegin; 1578 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1579 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1580 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1581 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1582 if (iascii || isdraw || isbinary || issocket) { 1583 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1584 } else { 1585 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatSOR_MPIAIJ" 1592 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1593 { 1594 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1595 PetscErrorCode ierr; 1596 Vec bb1 = 0; 1597 PetscBool hasop; 1598 1599 PetscFunctionBegin; 1600 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1601 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1602 } 1603 1604 if (flag == SOR_APPLY_UPPER) { 1605 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1606 PetscFunctionReturn(0); 1607 } 1608 1609 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1610 if (flag & SOR_ZERO_INITIAL_GUESS) { 1611 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1612 its--; 1613 } 1614 1615 while (its--) { 1616 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1617 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 1619 /* update rhs: bb1 = bb - B*x */ 1620 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1621 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1622 1623 /* local sweep */ 1624 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1625 } 1626 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1627 if (flag & SOR_ZERO_INITIAL_GUESS) { 1628 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1629 its--; 1630 } 1631 while (its--) { 1632 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1633 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1634 1635 /* update rhs: bb1 = bb - B*x */ 1636 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1637 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1638 1639 /* local sweep */ 1640 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1641 } 1642 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1643 if (flag & SOR_ZERO_INITIAL_GUESS) { 1644 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1645 its--; 1646 } 1647 while (its--) { 1648 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1649 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 1651 /* update rhs: bb1 = bb - B*x */ 1652 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1653 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1654 1655 /* local sweep */ 1656 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1657 } 1658 } else if (flag & SOR_EISENSTAT) { 1659 Vec xx1; 1660 1661 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1662 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1663 1664 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1665 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 if (!mat->diag) { 1667 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1668 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1669 } 1670 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1671 if (hasop) { 1672 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1673 } else { 1674 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1675 } 1676 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1677 1678 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1679 1680 /* local sweep */ 1681 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1682 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1683 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1684 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1685 1686 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatPermute_MPIAIJ" 1692 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1693 { 1694 MPI_Comm comm; 1695 PetscInt first,local_rowsize,local_colsize; 1696 const PetscInt *rows; 1697 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1698 PetscErrorCode ierr; 1699 1700 PetscFunctionBegin; 1701 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1702 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1703 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1704 /* collect the global row permutation and invert it */ 1705 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1706 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1707 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1708 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1709 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1710 /* get the local target indices */ 1711 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1712 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1713 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1714 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1715 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1716 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1717 /* the column permutation is so much easier; 1718 make a local version of 'colp' and invert it */ 1719 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1720 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1721 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1722 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1723 /* now we just get the submatrix */ 1724 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1725 /* clean up */ 1726 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1727 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1733 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1734 { 1735 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1736 Mat A = mat->A,B = mat->B; 1737 PetscErrorCode ierr; 1738 PetscReal isend[5],irecv[5]; 1739 1740 PetscFunctionBegin; 1741 info->block_size = 1.0; 1742 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1743 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1744 isend[3] = info->memory; isend[4] = info->mallocs; 1745 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1746 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1747 isend[3] += info->memory; isend[4] += info->mallocs; 1748 if (flag == MAT_LOCAL) { 1749 info->nz_used = isend[0]; 1750 info->nz_allocated = isend[1]; 1751 info->nz_unneeded = isend[2]; 1752 info->memory = isend[3]; 1753 info->mallocs = isend[4]; 1754 } else if (flag == MAT_GLOBAL_MAX) { 1755 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1756 info->nz_used = irecv[0]; 1757 info->nz_allocated = irecv[1]; 1758 info->nz_unneeded = irecv[2]; 1759 info->memory = irecv[3]; 1760 info->mallocs = irecv[4]; 1761 } else if (flag == MAT_GLOBAL_SUM) { 1762 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1763 info->nz_used = irecv[0]; 1764 info->nz_allocated = irecv[1]; 1765 info->nz_unneeded = irecv[2]; 1766 info->memory = irecv[3]; 1767 info->mallocs = irecv[4]; 1768 } 1769 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1770 info->fill_ratio_needed = 0; 1771 info->factor_mallocs = 0; 1772 1773 PetscFunctionReturn(0); 1774 } 1775 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "MatSetOption_MPIAIJ" 1778 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1779 { 1780 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1781 PetscErrorCode ierr; 1782 1783 PetscFunctionBegin; 1784 switch (op) { 1785 case MAT_NEW_NONZERO_LOCATIONS: 1786 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1787 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1788 case MAT_KEEP_NONZERO_PATTERN: 1789 case MAT_NEW_NONZERO_LOCATION_ERR: 1790 case MAT_USE_INODES: 1791 case MAT_IGNORE_ZERO_ENTRIES: 1792 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1793 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1794 break; 1795 case MAT_ROW_ORIENTED: 1796 a->roworiented = flg; 1797 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1798 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1799 break; 1800 case MAT_NEW_DIAGONALS: 1801 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1802 break; 1803 case MAT_IGNORE_OFF_PROC_ENTRIES: 1804 a->donotstash = flg; 1805 break; 1806 case MAT_SPD: 1807 A->spd_set = PETSC_TRUE; 1808 A->spd = flg; 1809 if (flg) { 1810 A->symmetric = PETSC_TRUE; 1811 A->structurally_symmetric = PETSC_TRUE; 1812 A->symmetric_set = PETSC_TRUE; 1813 A->structurally_symmetric_set = PETSC_TRUE; 1814 } 1815 break; 1816 case MAT_SYMMETRIC: 1817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1818 break; 1819 case MAT_STRUCTURALLY_SYMMETRIC: 1820 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1821 break; 1822 case MAT_HERMITIAN: 1823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1824 break; 1825 case MAT_SYMMETRY_ETERNAL: 1826 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1827 break; 1828 default: 1829 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1830 } 1831 PetscFunctionReturn(0); 1832 } 1833 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatGetRow_MPIAIJ" 1836 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1837 { 1838 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1839 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1840 PetscErrorCode ierr; 1841 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1842 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1843 PetscInt *cmap,*idx_p; 1844 1845 PetscFunctionBegin; 1846 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1847 mat->getrowactive = PETSC_TRUE; 1848 1849 if (!mat->rowvalues && (idx || v)) { 1850 /* 1851 allocate enough space to hold information from the longest row. 1852 */ 1853 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1854 PetscInt max = 1,tmp; 1855 for (i=0; i<matin->rmap->n; i++) { 1856 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1857 if (max < tmp) { max = tmp; } 1858 } 1859 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1860 } 1861 1862 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1863 lrow = row - rstart; 1864 1865 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1866 if (!v) {pvA = 0; pvB = 0;} 1867 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1868 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1869 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1870 nztot = nzA + nzB; 1871 1872 cmap = mat->garray; 1873 if (v || idx) { 1874 if (nztot) { 1875 /* Sort by increasing column numbers, assuming A and B already sorted */ 1876 PetscInt imark = -1; 1877 if (v) { 1878 *v = v_p = mat->rowvalues; 1879 for (i=0; i<nzB; i++) { 1880 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1881 else break; 1882 } 1883 imark = i; 1884 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1885 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1886 } 1887 if (idx) { 1888 *idx = idx_p = mat->rowindices; 1889 if (imark > -1) { 1890 for (i=0; i<imark; i++) { 1891 idx_p[i] = cmap[cworkB[i]]; 1892 } 1893 } else { 1894 for (i=0; i<nzB; i++) { 1895 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1896 else break; 1897 } 1898 imark = i; 1899 } 1900 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1901 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1902 } 1903 } else { 1904 if (idx) *idx = 0; 1905 if (v) *v = 0; 1906 } 1907 } 1908 *nz = nztot; 1909 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1910 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1916 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1917 { 1918 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1919 1920 PetscFunctionBegin; 1921 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1922 aij->getrowactive = PETSC_FALSE; 1923 PetscFunctionReturn(0); 1924 } 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatNorm_MPIAIJ" 1928 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1929 { 1930 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1931 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1932 PetscErrorCode ierr; 1933 PetscInt i,j,cstart = mat->cmap->rstart; 1934 PetscReal sum = 0.0; 1935 MatScalar *v; 1936 1937 PetscFunctionBegin; 1938 if (aij->size == 1) { 1939 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1940 } else { 1941 if (type == NORM_FROBENIUS) { 1942 v = amat->a; 1943 for (i=0; i<amat->nz; i++) { 1944 #if defined(PETSC_USE_COMPLEX) 1945 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1946 #else 1947 sum += (*v)*(*v); v++; 1948 #endif 1949 } 1950 v = bmat->a; 1951 for (i=0; i<bmat->nz; i++) { 1952 #if defined(PETSC_USE_COMPLEX) 1953 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1954 #else 1955 sum += (*v)*(*v); v++; 1956 #endif 1957 } 1958 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1959 *norm = PetscSqrtReal(*norm); 1960 } else if (type == NORM_1) { /* max column norm */ 1961 PetscReal *tmp,*tmp2; 1962 PetscInt *jj,*garray = aij->garray; 1963 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1964 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1965 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1966 *norm = 0.0; 1967 v = amat->a; jj = amat->j; 1968 for (j=0; j<amat->nz; j++) { 1969 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1970 } 1971 v = bmat->a; jj = bmat->j; 1972 for (j=0; j<bmat->nz; j++) { 1973 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1974 } 1975 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1976 for (j=0; j<mat->cmap->N; j++) { 1977 if (tmp2[j] > *norm) *norm = tmp2[j]; 1978 } 1979 ierr = PetscFree(tmp);CHKERRQ(ierr); 1980 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1981 } else if (type == NORM_INFINITY) { /* max row norm */ 1982 PetscReal ntemp = 0.0; 1983 for (j=0; j<aij->A->rmap->n; j++) { 1984 v = amat->a + amat->i[j]; 1985 sum = 0.0; 1986 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 v = bmat->a + bmat->i[j]; 1990 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1991 sum += PetscAbsScalar(*v); v++; 1992 } 1993 if (sum > ntemp) ntemp = sum; 1994 } 1995 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1996 } else { 1997 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1998 } 1999 } 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatTranspose_MPIAIJ" 2005 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2006 { 2007 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2008 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2009 PetscErrorCode ierr; 2010 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2011 PetscInt cstart=A->cmap->rstart,ncol; 2012 Mat B; 2013 MatScalar *array; 2014 2015 PetscFunctionBegin; 2016 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2017 2018 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2019 ai = Aloc->i; aj = Aloc->j; 2020 bi = Bloc->i; bj = Bloc->j; 2021 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2022 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2023 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2024 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2025 for (i=0; i<ai[ma]; i++){ 2026 d_nnz[aj[i]] ++; 2027 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2028 } 2029 2030 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2031 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2032 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2033 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2034 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2035 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2036 } else { 2037 B = *matout; 2038 } 2039 2040 /* copy over the A part */ 2041 array = Aloc->a; 2042 row = A->rmap->rstart; 2043 for (i=0; i<ma; i++) { 2044 ncol = ai[i+1]-ai[i]; 2045 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2046 row++; array += ncol; aj += ncol; 2047 } 2048 aj = Aloc->j; 2049 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2050 2051 /* copy over the B part */ 2052 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2053 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2054 array = Bloc->a; 2055 row = A->rmap->rstart; 2056 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2057 cols_tmp = cols; 2058 for (i=0; i<mb; i++) { 2059 ncol = bi[i+1]-bi[i]; 2060 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2061 row++; array += ncol; cols_tmp += ncol; 2062 } 2063 ierr = PetscFree(cols);CHKERRQ(ierr); 2064 2065 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2066 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2067 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2068 *matout = B; 2069 } else { 2070 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2077 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2078 { 2079 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2080 Mat a = aij->A,b = aij->B; 2081 PetscErrorCode ierr; 2082 PetscInt s1,s2,s3; 2083 2084 PetscFunctionBegin; 2085 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2086 if (rr) { 2087 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2088 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2089 /* Overlap communication with computation. */ 2090 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2091 } 2092 if (ll) { 2093 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2094 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2095 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2096 } 2097 /* scale the diagonal block */ 2098 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2099 2100 if (rr) { 2101 /* Do a scatter end and then right scale the off-diagonal block */ 2102 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2103 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2104 } 2105 2106 PetscFunctionReturn(0); 2107 } 2108 2109 #undef __FUNCT__ 2110 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2111 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2112 { 2113 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2114 PetscErrorCode ierr; 2115 2116 PetscFunctionBegin; 2117 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2118 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2119 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2120 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2121 PetscFunctionReturn(0); 2122 } 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2125 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2126 { 2127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2128 PetscErrorCode ierr; 2129 2130 PetscFunctionBegin; 2131 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2132 PetscFunctionReturn(0); 2133 } 2134 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "MatEqual_MPIAIJ" 2137 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2138 { 2139 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2140 Mat a,b,c,d; 2141 PetscBool flg; 2142 PetscErrorCode ierr; 2143 2144 PetscFunctionBegin; 2145 a = matA->A; b = matA->B; 2146 c = matB->A; d = matB->B; 2147 2148 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2149 if (flg) { 2150 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2151 } 2152 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2153 PetscFunctionReturn(0); 2154 } 2155 2156 #undef __FUNCT__ 2157 #define __FUNCT__ "MatCopy_MPIAIJ" 2158 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2159 { 2160 PetscErrorCode ierr; 2161 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2162 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2163 2164 PetscFunctionBegin; 2165 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2166 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2167 /* because of the column compression in the off-processor part of the matrix a->B, 2168 the number of columns in a->B and b->B may be different, hence we cannot call 2169 the MatCopy() directly on the two parts. If need be, we can provide a more 2170 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2171 then copying the submatrices */ 2172 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2173 } else { 2174 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2175 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2176 } 2177 PetscFunctionReturn(0); 2178 } 2179 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "MatSetUp_MPIAIJ" 2182 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2183 { 2184 PetscErrorCode ierr; 2185 2186 PetscFunctionBegin; 2187 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2188 PetscFunctionReturn(0); 2189 } 2190 2191 #undef __FUNCT__ 2192 #define __FUNCT__ "MatAXPY_MPIAIJ" 2193 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2194 { 2195 PetscErrorCode ierr; 2196 PetscInt i; 2197 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2198 PetscBLASInt bnz,one=1; 2199 Mat_SeqAIJ *x,*y; 2200 2201 PetscFunctionBegin; 2202 if (str == SAME_NONZERO_PATTERN) { 2203 PetscScalar alpha = a; 2204 x = (Mat_SeqAIJ *)xx->A->data; 2205 y = (Mat_SeqAIJ *)yy->A->data; 2206 bnz = PetscBLASIntCast(x->nz); 2207 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2208 x = (Mat_SeqAIJ *)xx->B->data; 2209 y = (Mat_SeqAIJ *)yy->B->data; 2210 bnz = PetscBLASIntCast(x->nz); 2211 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2212 } else if (str == SUBSET_NONZERO_PATTERN) { 2213 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2214 2215 x = (Mat_SeqAIJ *)xx->B->data; 2216 y = (Mat_SeqAIJ *)yy->B->data; 2217 if (y->xtoy && y->XtoY != xx->B) { 2218 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2219 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2220 } 2221 if (!y->xtoy) { /* get xtoy */ 2222 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2223 y->XtoY = xx->B; 2224 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2225 } 2226 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2227 } else { 2228 Mat B; 2229 PetscInt *nnz_d,*nnz_o; 2230 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2231 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2232 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2233 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2234 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2235 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2236 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2237 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2238 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2239 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2240 ierr = MatHeaderReplace(Y,B); 2241 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2242 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2243 } 2244 PetscFunctionReturn(0); 2245 } 2246 2247 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2248 2249 #undef __FUNCT__ 2250 #define __FUNCT__ "MatConjugate_MPIAIJ" 2251 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2252 { 2253 #if defined(PETSC_USE_COMPLEX) 2254 PetscErrorCode ierr; 2255 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2256 2257 PetscFunctionBegin; 2258 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2259 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2260 #else 2261 PetscFunctionBegin; 2262 #endif 2263 PetscFunctionReturn(0); 2264 } 2265 2266 #undef __FUNCT__ 2267 #define __FUNCT__ "MatRealPart_MPIAIJ" 2268 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2269 { 2270 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2271 PetscErrorCode ierr; 2272 2273 PetscFunctionBegin; 2274 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2275 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2276 PetscFunctionReturn(0); 2277 } 2278 2279 #undef __FUNCT__ 2280 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2281 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2282 { 2283 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2284 PetscErrorCode ierr; 2285 2286 PetscFunctionBegin; 2287 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2288 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2289 PetscFunctionReturn(0); 2290 } 2291 2292 #ifdef PETSC_HAVE_PBGL 2293 2294 #include <boost/parallel/mpi/bsp_process_group.hpp> 2295 #include <boost/graph/distributed/ilu_default_graph.hpp> 2296 #include <boost/graph/distributed/ilu_0_block.hpp> 2297 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2298 #include <boost/graph/distributed/petsc/interface.hpp> 2299 #include <boost/multi_array.hpp> 2300 #include <boost/parallel/distributed_property_map->hpp> 2301 2302 #undef __FUNCT__ 2303 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2304 /* 2305 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2306 */ 2307 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2308 { 2309 namespace petsc = boost::distributed::petsc; 2310 2311 namespace graph_dist = boost::graph::distributed; 2312 using boost::graph::distributed::ilu_default::process_group_type; 2313 using boost::graph::ilu_permuted; 2314 2315 PetscBool row_identity, col_identity; 2316 PetscContainer c; 2317 PetscInt m, n, M, N; 2318 PetscErrorCode ierr; 2319 2320 PetscFunctionBegin; 2321 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2322 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2323 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2324 if (!row_identity || !col_identity) { 2325 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2326 } 2327 2328 process_group_type pg; 2329 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2330 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2331 lgraph_type& level_graph = *lgraph_p; 2332 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2333 2334 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2335 ilu_permuted(level_graph); 2336 2337 /* put together the new matrix */ 2338 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2339 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2340 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2341 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2342 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2343 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2344 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2345 2346 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2347 ierr = PetscContainerSetPointer(c, lgraph_p); 2348 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2349 ierr = PetscContainerDestroy(&c); 2350 PetscFunctionReturn(0); 2351 } 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2355 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2356 { 2357 PetscFunctionBegin; 2358 PetscFunctionReturn(0); 2359 } 2360 2361 #undef __FUNCT__ 2362 #define __FUNCT__ "MatSolve_MPIAIJ" 2363 /* 2364 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2365 */ 2366 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2367 { 2368 namespace graph_dist = boost::graph::distributed; 2369 2370 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2371 lgraph_type* lgraph_p; 2372 PetscContainer c; 2373 PetscErrorCode ierr; 2374 2375 PetscFunctionBegin; 2376 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2377 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2378 ierr = VecCopy(b, x);CHKERRQ(ierr); 2379 2380 PetscScalar* array_x; 2381 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2382 PetscInt sx; 2383 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2384 2385 PetscScalar* array_b; 2386 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2387 PetscInt sb; 2388 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2389 2390 lgraph_type& level_graph = *lgraph_p; 2391 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2392 2393 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2394 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2395 ref_x(array_x, boost::extents[num_vertices(graph)]); 2396 2397 typedef boost::iterator_property_map<array_ref_type::iterator, 2398 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2399 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2400 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2401 2402 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2403 2404 PetscFunctionReturn(0); 2405 } 2406 #endif 2407 2408 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2409 PetscInt nzlocal,nsends,nrecvs; 2410 PetscMPIInt *send_rank,*recv_rank; 2411 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2412 PetscScalar *sbuf_a,**rbuf_a; 2413 PetscErrorCode (*Destroy)(Mat); 2414 } Mat_Redundant; 2415 2416 #undef __FUNCT__ 2417 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2418 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2419 { 2420 PetscErrorCode ierr; 2421 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2422 PetscInt i; 2423 2424 PetscFunctionBegin; 2425 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2426 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2427 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2428 for (i=0; i<redund->nrecvs; i++){ 2429 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2430 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2431 } 2432 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2433 ierr = PetscFree(redund);CHKERRQ(ierr); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 #undef __FUNCT__ 2438 #define __FUNCT__ "MatDestroy_MatRedundant" 2439 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2440 { 2441 PetscErrorCode ierr; 2442 PetscContainer container; 2443 Mat_Redundant *redund=PETSC_NULL; 2444 2445 PetscFunctionBegin; 2446 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2447 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2448 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2449 A->ops->destroy = redund->Destroy; 2450 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2451 if (A->ops->destroy) { 2452 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2453 } 2454 PetscFunctionReturn(0); 2455 } 2456 2457 #undef __FUNCT__ 2458 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2459 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2460 { 2461 PetscMPIInt rank,size; 2462 MPI_Comm comm=((PetscObject)mat)->comm; 2463 PetscErrorCode ierr; 2464 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2465 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2466 PetscInt *rowrange=mat->rmap->range; 2467 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2468 Mat A=aij->A,B=aij->B,C=*matredundant; 2469 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2470 PetscScalar *sbuf_a; 2471 PetscInt nzlocal=a->nz+b->nz; 2472 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2473 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2474 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2475 MatScalar *aworkA,*aworkB; 2476 PetscScalar *vals; 2477 PetscMPIInt tag1,tag2,tag3,imdex; 2478 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2479 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2480 MPI_Status recv_status,*send_status; 2481 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2482 PetscInt **rbuf_j=PETSC_NULL; 2483 PetscScalar **rbuf_a=PETSC_NULL; 2484 Mat_Redundant *redund=PETSC_NULL; 2485 PetscContainer container; 2486 2487 PetscFunctionBegin; 2488 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2489 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2490 2491 if (reuse == MAT_REUSE_MATRIX) { 2492 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2493 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2494 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2495 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2496 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2497 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2498 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2499 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2500 2501 nsends = redund->nsends; 2502 nrecvs = redund->nrecvs; 2503 send_rank = redund->send_rank; 2504 recv_rank = redund->recv_rank; 2505 sbuf_nz = redund->sbuf_nz; 2506 rbuf_nz = redund->rbuf_nz; 2507 sbuf_j = redund->sbuf_j; 2508 sbuf_a = redund->sbuf_a; 2509 rbuf_j = redund->rbuf_j; 2510 rbuf_a = redund->rbuf_a; 2511 } 2512 2513 if (reuse == MAT_INITIAL_MATRIX){ 2514 PetscMPIInt subrank,subsize; 2515 PetscInt nleftover,np_subcomm; 2516 /* get the destination processors' id send_rank, nsends and nrecvs */ 2517 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2518 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2519 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2520 np_subcomm = size/nsubcomm; 2521 nleftover = size - nsubcomm*np_subcomm; 2522 nsends = 0; nrecvs = 0; 2523 for (i=0; i<size; i++){ /* i=rank*/ 2524 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2525 send_rank[nsends] = i; nsends++; 2526 recv_rank[nrecvs++] = i; 2527 } 2528 } 2529 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2530 i = size-nleftover-1; 2531 j = 0; 2532 while (j < nsubcomm - nleftover){ 2533 send_rank[nsends++] = i; 2534 i--; j++; 2535 } 2536 } 2537 2538 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2539 for (i=0; i<nleftover; i++){ 2540 recv_rank[nrecvs++] = size-nleftover+i; 2541 } 2542 } 2543 2544 /* allocate sbuf_j, sbuf_a */ 2545 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2546 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2547 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2548 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2549 2550 /* copy mat's local entries into the buffers */ 2551 if (reuse == MAT_INITIAL_MATRIX){ 2552 rownz_max = 0; 2553 rptr = sbuf_j; 2554 cols = sbuf_j + rend-rstart + 1; 2555 vals = sbuf_a; 2556 rptr[0] = 0; 2557 for (i=0; i<rend-rstart; i++){ 2558 row = i + rstart; 2559 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2560 ncols = nzA + nzB; 2561 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2562 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2563 /* load the column indices for this row into cols */ 2564 lwrite = 0; 2565 for (l=0; l<nzB; l++) { 2566 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2567 vals[lwrite] = aworkB[l]; 2568 cols[lwrite++] = ctmp; 2569 } 2570 } 2571 for (l=0; l<nzA; l++){ 2572 vals[lwrite] = aworkA[l]; 2573 cols[lwrite++] = cstart + cworkA[l]; 2574 } 2575 for (l=0; l<nzB; l++) { 2576 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2577 vals[lwrite] = aworkB[l]; 2578 cols[lwrite++] = ctmp; 2579 } 2580 } 2581 vals += ncols; 2582 cols += ncols; 2583 rptr[i+1] = rptr[i] + ncols; 2584 if (rownz_max < ncols) rownz_max = ncols; 2585 } 2586 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2587 } else { /* only copy matrix values into sbuf_a */ 2588 rptr = sbuf_j; 2589 vals = sbuf_a; 2590 rptr[0] = 0; 2591 for (i=0; i<rend-rstart; i++){ 2592 row = i + rstart; 2593 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2594 ncols = nzA + nzB; 2595 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2596 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2597 lwrite = 0; 2598 for (l=0; l<nzB; l++) { 2599 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2600 } 2601 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2602 for (l=0; l<nzB; l++) { 2603 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2604 } 2605 vals += ncols; 2606 rptr[i+1] = rptr[i] + ncols; 2607 } 2608 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2609 2610 /* send nzlocal to others, and recv other's nzlocal */ 2611 /*--------------------------------------------------*/ 2612 if (reuse == MAT_INITIAL_MATRIX){ 2613 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2614 s_waits2 = s_waits3 + nsends; 2615 s_waits1 = s_waits2 + nsends; 2616 r_waits1 = s_waits1 + nsends; 2617 r_waits2 = r_waits1 + nrecvs; 2618 r_waits3 = r_waits2 + nrecvs; 2619 } else { 2620 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2621 r_waits3 = s_waits3 + nsends; 2622 } 2623 2624 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2625 if (reuse == MAT_INITIAL_MATRIX){ 2626 /* get new tags to keep the communication clean */ 2627 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2628 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2629 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2630 2631 /* post receives of other's nzlocal */ 2632 for (i=0; i<nrecvs; i++){ 2633 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2634 } 2635 /* send nzlocal to others */ 2636 for (i=0; i<nsends; i++){ 2637 sbuf_nz[i] = nzlocal; 2638 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2639 } 2640 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2641 count = nrecvs; 2642 while (count) { 2643 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2644 recv_rank[imdex] = recv_status.MPI_SOURCE; 2645 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2646 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2647 2648 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2649 rbuf_nz[imdex] += i + 2; 2650 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2651 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2652 count--; 2653 } 2654 /* wait on sends of nzlocal */ 2655 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2656 /* send mat->i,j to others, and recv from other's */ 2657 /*------------------------------------------------*/ 2658 for (i=0; i<nsends; i++){ 2659 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2660 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2661 } 2662 /* wait on receives of mat->i,j */ 2663 /*------------------------------*/ 2664 count = nrecvs; 2665 while (count) { 2666 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2667 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2668 count--; 2669 } 2670 /* wait on sends of mat->i,j */ 2671 /*---------------------------*/ 2672 if (nsends) { 2673 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2674 } 2675 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2676 2677 /* post receives, send and receive mat->a */ 2678 /*----------------------------------------*/ 2679 for (imdex=0; imdex<nrecvs; imdex++) { 2680 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2681 } 2682 for (i=0; i<nsends; i++){ 2683 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2684 } 2685 count = nrecvs; 2686 while (count) { 2687 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2688 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2689 count--; 2690 } 2691 if (nsends) { 2692 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2693 } 2694 2695 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2696 2697 /* create redundant matrix */ 2698 /*-------------------------*/ 2699 if (reuse == MAT_INITIAL_MATRIX){ 2700 /* compute rownz_max for preallocation */ 2701 for (imdex=0; imdex<nrecvs; imdex++){ 2702 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2703 rptr = rbuf_j[imdex]; 2704 for (i=0; i<j; i++){ 2705 ncols = rptr[i+1] - rptr[i]; 2706 if (rownz_max < ncols) rownz_max = ncols; 2707 } 2708 } 2709 2710 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2711 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2712 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2713 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2714 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2715 } else { 2716 C = *matredundant; 2717 } 2718 2719 /* insert local matrix entries */ 2720 rptr = sbuf_j; 2721 cols = sbuf_j + rend-rstart + 1; 2722 vals = sbuf_a; 2723 for (i=0; i<rend-rstart; i++){ 2724 row = i + rstart; 2725 ncols = rptr[i+1] - rptr[i]; 2726 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2727 vals += ncols; 2728 cols += ncols; 2729 } 2730 /* insert received matrix entries */ 2731 for (imdex=0; imdex<nrecvs; imdex++){ 2732 rstart = rowrange[recv_rank[imdex]]; 2733 rend = rowrange[recv_rank[imdex]+1]; 2734 rptr = rbuf_j[imdex]; 2735 cols = rbuf_j[imdex] + rend-rstart + 1; 2736 vals = rbuf_a[imdex]; 2737 for (i=0; i<rend-rstart; i++){ 2738 row = i + rstart; 2739 ncols = rptr[i+1] - rptr[i]; 2740 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2741 vals += ncols; 2742 cols += ncols; 2743 } 2744 } 2745 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2746 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2747 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2748 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2749 if (reuse == MAT_INITIAL_MATRIX) { 2750 PetscContainer container; 2751 *matredundant = C; 2752 /* create a supporting struct and attach it to C for reuse */ 2753 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2754 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2755 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2756 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2757 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2758 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2759 2760 redund->nzlocal = nzlocal; 2761 redund->nsends = nsends; 2762 redund->nrecvs = nrecvs; 2763 redund->send_rank = send_rank; 2764 redund->recv_rank = recv_rank; 2765 redund->sbuf_nz = sbuf_nz; 2766 redund->rbuf_nz = rbuf_nz; 2767 redund->sbuf_j = sbuf_j; 2768 redund->sbuf_a = sbuf_a; 2769 redund->rbuf_j = rbuf_j; 2770 redund->rbuf_a = rbuf_a; 2771 2772 redund->Destroy = C->ops->destroy; 2773 C->ops->destroy = MatDestroy_MatRedundant; 2774 } 2775 PetscFunctionReturn(0); 2776 } 2777 2778 #undef __FUNCT__ 2779 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2780 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2781 { 2782 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2783 PetscErrorCode ierr; 2784 PetscInt i,*idxb = 0; 2785 PetscScalar *va,*vb; 2786 Vec vtmp; 2787 2788 PetscFunctionBegin; 2789 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2790 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2791 if (idx) { 2792 for (i=0; i<A->rmap->n; i++) { 2793 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2794 } 2795 } 2796 2797 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2798 if (idx) { 2799 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2800 } 2801 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2802 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2803 2804 for (i=0; i<A->rmap->n; i++){ 2805 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2806 va[i] = vb[i]; 2807 if (idx) idx[i] = a->garray[idxb[i]]; 2808 } 2809 } 2810 2811 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2812 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2813 ierr = PetscFree(idxb);CHKERRQ(ierr); 2814 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2815 PetscFunctionReturn(0); 2816 } 2817 2818 #undef __FUNCT__ 2819 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2820 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2821 { 2822 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2823 PetscErrorCode ierr; 2824 PetscInt i,*idxb = 0; 2825 PetscScalar *va,*vb; 2826 Vec vtmp; 2827 2828 PetscFunctionBegin; 2829 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2830 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2831 if (idx) { 2832 for (i=0; i<A->cmap->n; i++) { 2833 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2834 } 2835 } 2836 2837 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2838 if (idx) { 2839 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2840 } 2841 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2842 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2843 2844 for (i=0; i<A->rmap->n; i++){ 2845 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2846 va[i] = vb[i]; 2847 if (idx) idx[i] = a->garray[idxb[i]]; 2848 } 2849 } 2850 2851 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2852 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2853 ierr = PetscFree(idxb);CHKERRQ(ierr); 2854 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2855 PetscFunctionReturn(0); 2856 } 2857 2858 #undef __FUNCT__ 2859 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2860 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2861 { 2862 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2863 PetscInt n = A->rmap->n; 2864 PetscInt cstart = A->cmap->rstart; 2865 PetscInt *cmap = mat->garray; 2866 PetscInt *diagIdx, *offdiagIdx; 2867 Vec diagV, offdiagV; 2868 PetscScalar *a, *diagA, *offdiagA; 2869 PetscInt r; 2870 PetscErrorCode ierr; 2871 2872 PetscFunctionBegin; 2873 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2874 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2875 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2876 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2877 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2878 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2879 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2880 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2881 for(r = 0; r < n; ++r) { 2882 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2883 a[r] = diagA[r]; 2884 idx[r] = cstart + diagIdx[r]; 2885 } else { 2886 a[r] = offdiagA[r]; 2887 idx[r] = cmap[offdiagIdx[r]]; 2888 } 2889 } 2890 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2891 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2892 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2893 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2894 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2895 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2896 PetscFunctionReturn(0); 2897 } 2898 2899 #undef __FUNCT__ 2900 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2901 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2902 { 2903 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2904 PetscInt n = A->rmap->n; 2905 PetscInt cstart = A->cmap->rstart; 2906 PetscInt *cmap = mat->garray; 2907 PetscInt *diagIdx, *offdiagIdx; 2908 Vec diagV, offdiagV; 2909 PetscScalar *a, *diagA, *offdiagA; 2910 PetscInt r; 2911 PetscErrorCode ierr; 2912 2913 PetscFunctionBegin; 2914 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2915 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2916 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2917 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2918 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2919 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2920 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2921 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2922 for(r = 0; r < n; ++r) { 2923 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2924 a[r] = diagA[r]; 2925 idx[r] = cstart + diagIdx[r]; 2926 } else { 2927 a[r] = offdiagA[r]; 2928 idx[r] = cmap[offdiagIdx[r]]; 2929 } 2930 } 2931 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2932 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2933 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2934 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2935 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2936 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2937 PetscFunctionReturn(0); 2938 } 2939 2940 #undef __FUNCT__ 2941 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2942 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2943 { 2944 PetscErrorCode ierr; 2945 Mat *dummy; 2946 2947 PetscFunctionBegin; 2948 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2949 *newmat = *dummy; 2950 ierr = PetscFree(dummy);CHKERRQ(ierr); 2951 PetscFunctionReturn(0); 2952 } 2953 2954 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2955 2956 #undef __FUNCT__ 2957 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2958 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2959 { 2960 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2961 PetscErrorCode ierr; 2962 2963 PetscFunctionBegin; 2964 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2965 PetscFunctionReturn(0); 2966 } 2967 2968 2969 /* -------------------------------------------------------------------*/ 2970 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2971 MatGetRow_MPIAIJ, 2972 MatRestoreRow_MPIAIJ, 2973 MatMult_MPIAIJ, 2974 /* 4*/ MatMultAdd_MPIAIJ, 2975 MatMultTranspose_MPIAIJ, 2976 MatMultTransposeAdd_MPIAIJ, 2977 #ifdef PETSC_HAVE_PBGL 2978 MatSolve_MPIAIJ, 2979 #else 2980 0, 2981 #endif 2982 0, 2983 0, 2984 /*10*/ 0, 2985 0, 2986 0, 2987 MatSOR_MPIAIJ, 2988 MatTranspose_MPIAIJ, 2989 /*15*/ MatGetInfo_MPIAIJ, 2990 MatEqual_MPIAIJ, 2991 MatGetDiagonal_MPIAIJ, 2992 MatDiagonalScale_MPIAIJ, 2993 MatNorm_MPIAIJ, 2994 /*20*/ MatAssemblyBegin_MPIAIJ, 2995 MatAssemblyEnd_MPIAIJ, 2996 MatSetOption_MPIAIJ, 2997 MatZeroEntries_MPIAIJ, 2998 /*24*/ MatZeroRows_MPIAIJ, 2999 0, 3000 #ifdef PETSC_HAVE_PBGL 3001 0, 3002 #else 3003 0, 3004 #endif 3005 0, 3006 0, 3007 /*29*/ MatSetUp_MPIAIJ, 3008 #ifdef PETSC_HAVE_PBGL 3009 0, 3010 #else 3011 0, 3012 #endif 3013 0, 3014 0, 3015 0, 3016 /*34*/ MatDuplicate_MPIAIJ, 3017 0, 3018 0, 3019 0, 3020 0, 3021 /*39*/ MatAXPY_MPIAIJ, 3022 MatGetSubMatrices_MPIAIJ, 3023 MatIncreaseOverlap_MPIAIJ, 3024 MatGetValues_MPIAIJ, 3025 MatCopy_MPIAIJ, 3026 /*44*/ MatGetRowMax_MPIAIJ, 3027 MatScale_MPIAIJ, 3028 0, 3029 0, 3030 MatZeroRowsColumns_MPIAIJ, 3031 /*49*/ MatSetBlockSize_MPIAIJ, 3032 0, 3033 0, 3034 0, 3035 0, 3036 /*54*/ MatFDColoringCreate_MPIAIJ, 3037 0, 3038 MatSetUnfactored_MPIAIJ, 3039 MatPermute_MPIAIJ, 3040 0, 3041 /*59*/ MatGetSubMatrix_MPIAIJ, 3042 MatDestroy_MPIAIJ, 3043 MatView_MPIAIJ, 3044 0, 3045 0, 3046 /*64*/ 0, 3047 0, 3048 0, 3049 0, 3050 0, 3051 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3052 MatGetRowMinAbs_MPIAIJ, 3053 0, 3054 MatSetColoring_MPIAIJ, 3055 #if defined(PETSC_HAVE_ADIC) 3056 MatSetValuesAdic_MPIAIJ, 3057 #else 3058 0, 3059 #endif 3060 MatSetValuesAdifor_MPIAIJ, 3061 /*75*/ MatFDColoringApply_AIJ, 3062 0, 3063 0, 3064 0, 3065 0, 3066 /*80*/ 0, 3067 0, 3068 0, 3069 /*83*/ MatLoad_MPIAIJ, 3070 0, 3071 0, 3072 0, 3073 0, 3074 0, 3075 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3076 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3077 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3078 MatPtAP_Basic, 3079 MatPtAPSymbolic_MPIAIJ, 3080 /*94*/ MatPtAPNumeric_MPIAIJ, 3081 0, 3082 0, 3083 0, 3084 0, 3085 /*99*/ 0, 3086 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3087 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3088 MatConjugate_MPIAIJ, 3089 0, 3090 /*104*/MatSetValuesRow_MPIAIJ, 3091 MatRealPart_MPIAIJ, 3092 MatImaginaryPart_MPIAIJ, 3093 0, 3094 0, 3095 /*109*/0, 3096 MatGetRedundantMatrix_MPIAIJ, 3097 MatGetRowMin_MPIAIJ, 3098 0, 3099 0, 3100 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3101 0, 3102 0, 3103 0, 3104 0, 3105 /*119*/0, 3106 0, 3107 0, 3108 0, 3109 MatGetMultiProcBlock_MPIAIJ, 3110 /*124*/MatFindNonZeroRows_MPIAIJ, 3111 MatGetColumnNorms_MPIAIJ, 3112 MatInvertBlockDiagonal_MPIAIJ, 3113 0, 3114 MatGetSubMatricesParallel_MPIAIJ, 3115 /*129*/0, 3116 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3117 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3118 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3119 0, 3120 /*134*/0, 3121 0, 3122 0, 3123 0, 3124 0 3125 }; 3126 3127 /* ----------------------------------------------------------------------------------------*/ 3128 3129 EXTERN_C_BEGIN 3130 #undef __FUNCT__ 3131 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3132 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3133 { 3134 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3135 PetscErrorCode ierr; 3136 3137 PetscFunctionBegin; 3138 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3139 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3140 PetscFunctionReturn(0); 3141 } 3142 EXTERN_C_END 3143 3144 EXTERN_C_BEGIN 3145 #undef __FUNCT__ 3146 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3147 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3148 { 3149 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3150 PetscErrorCode ierr; 3151 3152 PetscFunctionBegin; 3153 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3154 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3155 PetscFunctionReturn(0); 3156 } 3157 EXTERN_C_END 3158 3159 EXTERN_C_BEGIN 3160 #undef __FUNCT__ 3161 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3162 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3163 { 3164 Mat_MPIAIJ *b; 3165 PetscErrorCode ierr; 3166 PetscInt i; 3167 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3168 3169 PetscFunctionBegin; 3170 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3171 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3172 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3173 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3174 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3175 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3176 3177 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3178 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3179 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3180 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3181 if (d_nnz) { 3182 for (i=0; i<B->rmap->n; i++) { 3183 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3184 } 3185 } 3186 if (o_nnz) { 3187 for (i=0; i<B->rmap->n; i++) { 3188 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3189 } 3190 } 3191 b = (Mat_MPIAIJ*)B->data; 3192 3193 if (!B->preallocated) { 3194 /* Explicitly create 2 MATSEQAIJ matrices. */ 3195 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3196 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3197 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3198 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3199 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3200 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3201 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3202 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3203 } 3204 3205 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3206 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3207 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3208 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3209 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3210 B->preallocated = PETSC_TRUE; 3211 PetscFunctionReturn(0); 3212 } 3213 EXTERN_C_END 3214 3215 #undef __FUNCT__ 3216 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3217 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3218 { 3219 Mat mat; 3220 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3221 PetscErrorCode ierr; 3222 3223 PetscFunctionBegin; 3224 *newmat = 0; 3225 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3226 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3227 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3228 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3229 a = (Mat_MPIAIJ*)mat->data; 3230 3231 mat->factortype = matin->factortype; 3232 mat->rmap->bs = matin->rmap->bs; 3233 mat->assembled = PETSC_TRUE; 3234 mat->insertmode = NOT_SET_VALUES; 3235 mat->preallocated = PETSC_TRUE; 3236 3237 a->size = oldmat->size; 3238 a->rank = oldmat->rank; 3239 a->donotstash = oldmat->donotstash; 3240 a->roworiented = oldmat->roworiented; 3241 a->rowindices = 0; 3242 a->rowvalues = 0; 3243 a->getrowactive = PETSC_FALSE; 3244 3245 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3246 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3247 3248 if (oldmat->colmap) { 3249 #if defined (PETSC_USE_CTABLE) 3250 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3251 #else 3252 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3253 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3254 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3255 #endif 3256 } else a->colmap = 0; 3257 if (oldmat->garray) { 3258 PetscInt len; 3259 len = oldmat->B->cmap->n; 3260 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3261 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3262 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3263 } else a->garray = 0; 3264 3265 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3266 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3267 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3268 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3269 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3270 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3271 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3272 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3273 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3274 *newmat = mat; 3275 PetscFunctionReturn(0); 3276 } 3277 3278 3279 3280 #undef __FUNCT__ 3281 #define __FUNCT__ "MatLoad_MPIAIJ" 3282 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3283 { 3284 PetscScalar *vals,*svals; 3285 MPI_Comm comm = ((PetscObject)viewer)->comm; 3286 PetscErrorCode ierr; 3287 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3288 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3289 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3290 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3291 PetscInt cend,cstart,n,*rowners,sizesset=1; 3292 int fd; 3293 3294 PetscFunctionBegin; 3295 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3296 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3297 if (!rank) { 3298 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3299 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3300 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3301 } 3302 3303 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3304 3305 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3306 M = header[1]; N = header[2]; 3307 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3308 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3309 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3310 3311 /* If global sizes are set, check if they are consistent with that given in the file */ 3312 if (sizesset) { 3313 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3314 } 3315 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3316 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3317 3318 /* determine ownership of all rows */ 3319 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3320 else m = newMat->rmap->n; /* Set by user */ 3321 3322 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3323 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3324 3325 /* First process needs enough room for process with most rows */ 3326 if (!rank) { 3327 mmax = rowners[1]; 3328 for (i=2; i<size; i++) { 3329 mmax = PetscMax(mmax,rowners[i]); 3330 } 3331 } else mmax = m; 3332 3333 rowners[0] = 0; 3334 for (i=2; i<=size; i++) { 3335 rowners[i] += rowners[i-1]; 3336 } 3337 rstart = rowners[rank]; 3338 rend = rowners[rank+1]; 3339 3340 /* distribute row lengths to all processors */ 3341 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3342 if (!rank) { 3343 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3344 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3345 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3346 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3347 for (j=0; j<m; j++) { 3348 procsnz[0] += ourlens[j]; 3349 } 3350 for (i=1; i<size; i++) { 3351 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3352 /* calculate the number of nonzeros on each processor */ 3353 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3354 procsnz[i] += rowlengths[j]; 3355 } 3356 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3357 } 3358 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3359 } else { 3360 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3361 } 3362 3363 if (!rank) { 3364 /* determine max buffer needed and allocate it */ 3365 maxnz = 0; 3366 for (i=0; i<size; i++) { 3367 maxnz = PetscMax(maxnz,procsnz[i]); 3368 } 3369 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3370 3371 /* read in my part of the matrix column indices */ 3372 nz = procsnz[0]; 3373 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3374 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3375 3376 /* read in every one elses and ship off */ 3377 for (i=1; i<size; i++) { 3378 nz = procsnz[i]; 3379 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3380 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3381 } 3382 ierr = PetscFree(cols);CHKERRQ(ierr); 3383 } else { 3384 /* determine buffer space needed for message */ 3385 nz = 0; 3386 for (i=0; i<m; i++) { 3387 nz += ourlens[i]; 3388 } 3389 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3390 3391 /* receive message of column indices*/ 3392 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3393 } 3394 3395 /* determine column ownership if matrix is not square */ 3396 if (N != M) { 3397 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3398 else n = newMat->cmap->n; 3399 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3400 cstart = cend - n; 3401 } else { 3402 cstart = rstart; 3403 cend = rend; 3404 n = cend - cstart; 3405 } 3406 3407 /* loop over local rows, determining number of off diagonal entries */ 3408 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3409 jj = 0; 3410 for (i=0; i<m; i++) { 3411 for (j=0; j<ourlens[i]; j++) { 3412 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3413 jj++; 3414 } 3415 } 3416 3417 for (i=0; i<m; i++) { 3418 ourlens[i] -= offlens[i]; 3419 } 3420 if (!sizesset) { 3421 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3422 } 3423 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3424 3425 for (i=0; i<m; i++) { 3426 ourlens[i] += offlens[i]; 3427 } 3428 3429 if (!rank) { 3430 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3431 3432 /* read in my part of the matrix numerical values */ 3433 nz = procsnz[0]; 3434 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3435 3436 /* insert into matrix */ 3437 jj = rstart; 3438 smycols = mycols; 3439 svals = vals; 3440 for (i=0; i<m; i++) { 3441 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3442 smycols += ourlens[i]; 3443 svals += ourlens[i]; 3444 jj++; 3445 } 3446 3447 /* read in other processors and ship out */ 3448 for (i=1; i<size; i++) { 3449 nz = procsnz[i]; 3450 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3451 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3452 } 3453 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3454 } else { 3455 /* receive numeric values */ 3456 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3457 3458 /* receive message of values*/ 3459 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3460 3461 /* insert into matrix */ 3462 jj = rstart; 3463 smycols = mycols; 3464 svals = vals; 3465 for (i=0; i<m; i++) { 3466 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3467 smycols += ourlens[i]; 3468 svals += ourlens[i]; 3469 jj++; 3470 } 3471 } 3472 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3473 ierr = PetscFree(vals);CHKERRQ(ierr); 3474 ierr = PetscFree(mycols);CHKERRQ(ierr); 3475 ierr = PetscFree(rowners);CHKERRQ(ierr); 3476 3477 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3478 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3479 PetscFunctionReturn(0); 3480 } 3481 3482 #undef __FUNCT__ 3483 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3484 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3485 { 3486 PetscErrorCode ierr; 3487 IS iscol_local; 3488 PetscInt csize; 3489 3490 PetscFunctionBegin; 3491 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3492 if (call == MAT_REUSE_MATRIX) { 3493 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3494 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3495 } else { 3496 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3497 } 3498 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3499 if (call == MAT_INITIAL_MATRIX) { 3500 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3501 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3502 } 3503 PetscFunctionReturn(0); 3504 } 3505 3506 #undef __FUNCT__ 3507 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3508 /* 3509 Not great since it makes two copies of the submatrix, first an SeqAIJ 3510 in local and then by concatenating the local matrices the end result. 3511 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3512 3513 Note: This requires a sequential iscol with all indices. 3514 */ 3515 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3516 { 3517 PetscErrorCode ierr; 3518 PetscMPIInt rank,size; 3519 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3520 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3521 Mat *local,M,Mreuse; 3522 MatScalar *vwork,*aa; 3523 MPI_Comm comm = ((PetscObject)mat)->comm; 3524 Mat_SeqAIJ *aij; 3525 3526 3527 PetscFunctionBegin; 3528 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3529 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3530 3531 if (call == MAT_REUSE_MATRIX) { 3532 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3533 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3534 local = &Mreuse; 3535 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3536 } else { 3537 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3538 Mreuse = *local; 3539 ierr = PetscFree(local);CHKERRQ(ierr); 3540 } 3541 3542 /* 3543 m - number of local rows 3544 n - number of columns (same on all processors) 3545 rstart - first row in new global matrix generated 3546 */ 3547 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3548 if (call == MAT_INITIAL_MATRIX) { 3549 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3550 ii = aij->i; 3551 jj = aij->j; 3552 3553 /* 3554 Determine the number of non-zeros in the diagonal and off-diagonal 3555 portions of the matrix in order to do correct preallocation 3556 */ 3557 3558 /* first get start and end of "diagonal" columns */ 3559 if (csize == PETSC_DECIDE) { 3560 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3561 if (mglobal == n) { /* square matrix */ 3562 nlocal = m; 3563 } else { 3564 nlocal = n/size + ((n % size) > rank); 3565 } 3566 } else { 3567 nlocal = csize; 3568 } 3569 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3570 rstart = rend - nlocal; 3571 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3572 3573 /* next, compute all the lengths */ 3574 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3575 olens = dlens + m; 3576 for (i=0; i<m; i++) { 3577 jend = ii[i+1] - ii[i]; 3578 olen = 0; 3579 dlen = 0; 3580 for (j=0; j<jend; j++) { 3581 if (*jj < rstart || *jj >= rend) olen++; 3582 else dlen++; 3583 jj++; 3584 } 3585 olens[i] = olen; 3586 dlens[i] = dlen; 3587 } 3588 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3589 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3590 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3591 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3592 ierr = PetscFree(dlens);CHKERRQ(ierr); 3593 } else { 3594 PetscInt ml,nl; 3595 3596 M = *newmat; 3597 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3598 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3599 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3600 /* 3601 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3602 rather than the slower MatSetValues(). 3603 */ 3604 M->was_assembled = PETSC_TRUE; 3605 M->assembled = PETSC_FALSE; 3606 } 3607 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3608 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3609 ii = aij->i; 3610 jj = aij->j; 3611 aa = aij->a; 3612 for (i=0; i<m; i++) { 3613 row = rstart + i; 3614 nz = ii[i+1] - ii[i]; 3615 cwork = jj; jj += nz; 3616 vwork = aa; aa += nz; 3617 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3618 } 3619 3620 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3621 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3622 *newmat = M; 3623 3624 /* save submatrix used in processor for next request */ 3625 if (call == MAT_INITIAL_MATRIX) { 3626 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3627 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3628 } 3629 3630 PetscFunctionReturn(0); 3631 } 3632 3633 EXTERN_C_BEGIN 3634 #undef __FUNCT__ 3635 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3636 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3637 { 3638 PetscInt m,cstart, cend,j,nnz,i,d; 3639 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3640 const PetscInt *JJ; 3641 PetscScalar *values; 3642 PetscErrorCode ierr; 3643 3644 PetscFunctionBegin; 3645 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3646 3647 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3648 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3649 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3650 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3651 m = B->rmap->n; 3652 cstart = B->cmap->rstart; 3653 cend = B->cmap->rend; 3654 rstart = B->rmap->rstart; 3655 3656 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3657 3658 #if defined(PETSC_USE_DEBUGGING) 3659 for (i=0; i<m; i++) { 3660 nnz = Ii[i+1]- Ii[i]; 3661 JJ = J + Ii[i]; 3662 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3663 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3664 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3665 } 3666 #endif 3667 3668 for (i=0; i<m; i++) { 3669 nnz = Ii[i+1]- Ii[i]; 3670 JJ = J + Ii[i]; 3671 nnz_max = PetscMax(nnz_max,nnz); 3672 d = 0; 3673 for (j=0; j<nnz; j++) { 3674 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3675 } 3676 d_nnz[i] = d; 3677 o_nnz[i] = nnz - d; 3678 } 3679 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3680 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3681 3682 if (v) values = (PetscScalar*)v; 3683 else { 3684 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3685 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3686 } 3687 3688 for (i=0; i<m; i++) { 3689 ii = i + rstart; 3690 nnz = Ii[i+1]- Ii[i]; 3691 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3692 } 3693 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3694 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3695 3696 if (!v) { 3697 ierr = PetscFree(values);CHKERRQ(ierr); 3698 } 3699 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3700 PetscFunctionReturn(0); 3701 } 3702 EXTERN_C_END 3703 3704 #undef __FUNCT__ 3705 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3706 /*@ 3707 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3708 (the default parallel PETSc format). 3709 3710 Collective on MPI_Comm 3711 3712 Input Parameters: 3713 + B - the matrix 3714 . i - the indices into j for the start of each local row (starts with zero) 3715 . j - the column indices for each local row (starts with zero) 3716 - v - optional values in the matrix 3717 3718 Level: developer 3719 3720 Notes: 3721 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3722 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3723 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3724 3725 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3726 3727 The format which is used for the sparse matrix input, is equivalent to a 3728 row-major ordering.. i.e for the following matrix, the input data expected is 3729 as shown: 3730 3731 1 0 0 3732 2 0 3 P0 3733 ------- 3734 4 5 6 P1 3735 3736 Process0 [P0]: rows_owned=[0,1] 3737 i = {0,1,3} [size = nrow+1 = 2+1] 3738 j = {0,0,2} [size = nz = 6] 3739 v = {1,2,3} [size = nz = 6] 3740 3741 Process1 [P1]: rows_owned=[2] 3742 i = {0,3} [size = nrow+1 = 1+1] 3743 j = {0,1,2} [size = nz = 6] 3744 v = {4,5,6} [size = nz = 6] 3745 3746 .keywords: matrix, aij, compressed row, sparse, parallel 3747 3748 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3749 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3750 @*/ 3751 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3752 { 3753 PetscErrorCode ierr; 3754 3755 PetscFunctionBegin; 3756 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3757 PetscFunctionReturn(0); 3758 } 3759 3760 #undef __FUNCT__ 3761 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3762 /*@C 3763 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3764 (the default parallel PETSc format). For good matrix assembly performance 3765 the user should preallocate the matrix storage by setting the parameters 3766 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3767 performance can be increased by more than a factor of 50. 3768 3769 Collective on MPI_Comm 3770 3771 Input Parameters: 3772 + A - the matrix 3773 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3774 (same value is used for all local rows) 3775 . d_nnz - array containing the number of nonzeros in the various rows of the 3776 DIAGONAL portion of the local submatrix (possibly different for each row) 3777 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3778 The size of this array is equal to the number of local rows, i.e 'm'. 3779 For matrices that will be factored, you must leave room for (and set) 3780 the diagonal entry even if it is zero. 3781 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3782 submatrix (same value is used for all local rows). 3783 - o_nnz - array containing the number of nonzeros in the various rows of the 3784 OFF-DIAGONAL portion of the local submatrix (possibly different for 3785 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3786 structure. The size of this array is equal to the number 3787 of local rows, i.e 'm'. 3788 3789 If the *_nnz parameter is given then the *_nz parameter is ignored 3790 3791 The AIJ format (also called the Yale sparse matrix format or 3792 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3793 storage. The stored row and column indices begin with zero. 3794 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3795 3796 The parallel matrix is partitioned such that the first m0 rows belong to 3797 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3798 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3799 3800 The DIAGONAL portion of the local submatrix of a processor can be defined 3801 as the submatrix which is obtained by extraction the part corresponding to 3802 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3803 first row that belongs to the processor, r2 is the last row belonging to 3804 the this processor, and c1-c2 is range of indices of the local part of a 3805 vector suitable for applying the matrix to. This is an mxn matrix. In the 3806 common case of a square matrix, the row and column ranges are the same and 3807 the DIAGONAL part is also square. The remaining portion of the local 3808 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3809 3810 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3811 3812 You can call MatGetInfo() to get information on how effective the preallocation was; 3813 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3814 You can also run with the option -info and look for messages with the string 3815 malloc in them to see if additional memory allocation was needed. 3816 3817 Example usage: 3818 3819 Consider the following 8x8 matrix with 34 non-zero values, that is 3820 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3821 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3822 as follows: 3823 3824 .vb 3825 1 2 0 | 0 3 0 | 0 4 3826 Proc0 0 5 6 | 7 0 0 | 8 0 3827 9 0 10 | 11 0 0 | 12 0 3828 ------------------------------------- 3829 13 0 14 | 15 16 17 | 0 0 3830 Proc1 0 18 0 | 19 20 21 | 0 0 3831 0 0 0 | 22 23 0 | 24 0 3832 ------------------------------------- 3833 Proc2 25 26 27 | 0 0 28 | 29 0 3834 30 0 0 | 31 32 33 | 0 34 3835 .ve 3836 3837 This can be represented as a collection of submatrices as: 3838 3839 .vb 3840 A B C 3841 D E F 3842 G H I 3843 .ve 3844 3845 Where the submatrices A,B,C are owned by proc0, D,E,F are 3846 owned by proc1, G,H,I are owned by proc2. 3847 3848 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3849 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3850 The 'M','N' parameters are 8,8, and have the same values on all procs. 3851 3852 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3853 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3854 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3855 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3856 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3857 matrix, ans [DF] as another SeqAIJ matrix. 3858 3859 When d_nz, o_nz parameters are specified, d_nz storage elements are 3860 allocated for every row of the local diagonal submatrix, and o_nz 3861 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3862 One way to choose d_nz and o_nz is to use the max nonzerors per local 3863 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3864 In this case, the values of d_nz,o_nz are: 3865 .vb 3866 proc0 : dnz = 2, o_nz = 2 3867 proc1 : dnz = 3, o_nz = 2 3868 proc2 : dnz = 1, o_nz = 4 3869 .ve 3870 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3871 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3872 for proc3. i.e we are using 12+15+10=37 storage locations to store 3873 34 values. 3874 3875 When d_nnz, o_nnz parameters are specified, the storage is specified 3876 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3877 In the above case the values for d_nnz,o_nnz are: 3878 .vb 3879 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3880 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3881 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3882 .ve 3883 Here the space allocated is sum of all the above values i.e 34, and 3884 hence pre-allocation is perfect. 3885 3886 Level: intermediate 3887 3888 .keywords: matrix, aij, compressed row, sparse, parallel 3889 3890 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3891 MPIAIJ, MatGetInfo() 3892 @*/ 3893 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3894 { 3895 PetscErrorCode ierr; 3896 3897 PetscFunctionBegin; 3898 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3899 PetscValidType(B,1); 3900 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3901 PetscFunctionReturn(0); 3902 } 3903 3904 #undef __FUNCT__ 3905 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3906 /*@ 3907 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3908 CSR format the local rows. 3909 3910 Collective on MPI_Comm 3911 3912 Input Parameters: 3913 + comm - MPI communicator 3914 . m - number of local rows (Cannot be PETSC_DECIDE) 3915 . n - This value should be the same as the local size used in creating the 3916 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3917 calculated if N is given) For square matrices n is almost always m. 3918 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3919 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3920 . i - row indices 3921 . j - column indices 3922 - a - matrix values 3923 3924 Output Parameter: 3925 . mat - the matrix 3926 3927 Level: intermediate 3928 3929 Notes: 3930 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3931 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3932 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3933 3934 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3935 3936 The format which is used for the sparse matrix input, is equivalent to a 3937 row-major ordering.. i.e for the following matrix, the input data expected is 3938 as shown: 3939 3940 1 0 0 3941 2 0 3 P0 3942 ------- 3943 4 5 6 P1 3944 3945 Process0 [P0]: rows_owned=[0,1] 3946 i = {0,1,3} [size = nrow+1 = 2+1] 3947 j = {0,0,2} [size = nz = 6] 3948 v = {1,2,3} [size = nz = 6] 3949 3950 Process1 [P1]: rows_owned=[2] 3951 i = {0,3} [size = nrow+1 = 1+1] 3952 j = {0,1,2} [size = nz = 6] 3953 v = {4,5,6} [size = nz = 6] 3954 3955 .keywords: matrix, aij, compressed row, sparse, parallel 3956 3957 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3958 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3959 @*/ 3960 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3961 { 3962 PetscErrorCode ierr; 3963 3964 PetscFunctionBegin; 3965 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3966 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3967 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3968 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3969 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3970 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3971 PetscFunctionReturn(0); 3972 } 3973 3974 #undef __FUNCT__ 3975 #define __FUNCT__ "MatCreateAIJ" 3976 /*@C 3977 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3978 (the default parallel PETSc format). For good matrix assembly performance 3979 the user should preallocate the matrix storage by setting the parameters 3980 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3981 performance can be increased by more than a factor of 50. 3982 3983 Collective on MPI_Comm 3984 3985 Input Parameters: 3986 + comm - MPI communicator 3987 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3988 This value should be the same as the local size used in creating the 3989 y vector for the matrix-vector product y = Ax. 3990 . n - This value should be the same as the local size used in creating the 3991 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3992 calculated if N is given) For square matrices n is almost always m. 3993 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3994 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3995 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3996 (same value is used for all local rows) 3997 . d_nnz - array containing the number of nonzeros in the various rows of the 3998 DIAGONAL portion of the local submatrix (possibly different for each row) 3999 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4000 The size of this array is equal to the number of local rows, i.e 'm'. 4001 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4002 submatrix (same value is used for all local rows). 4003 - o_nnz - array containing the number of nonzeros in the various rows of the 4004 OFF-DIAGONAL portion of the local submatrix (possibly different for 4005 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4006 structure. The size of this array is equal to the number 4007 of local rows, i.e 'm'. 4008 4009 Output Parameter: 4010 . A - the matrix 4011 4012 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4013 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4014 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4015 4016 Notes: 4017 If the *_nnz parameter is given then the *_nz parameter is ignored 4018 4019 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4020 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4021 storage requirements for this matrix. 4022 4023 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4024 processor than it must be used on all processors that share the object for 4025 that argument. 4026 4027 The user MUST specify either the local or global matrix dimensions 4028 (possibly both). 4029 4030 The parallel matrix is partitioned across processors such that the 4031 first m0 rows belong to process 0, the next m1 rows belong to 4032 process 1, the next m2 rows belong to process 2 etc.. where 4033 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4034 values corresponding to [m x N] submatrix. 4035 4036 The columns are logically partitioned with the n0 columns belonging 4037 to 0th partition, the next n1 columns belonging to the next 4038 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4039 4040 The DIAGONAL portion of the local submatrix on any given processor 4041 is the submatrix corresponding to the rows and columns m,n 4042 corresponding to the given processor. i.e diagonal matrix on 4043 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4044 etc. The remaining portion of the local submatrix [m x (N-n)] 4045 constitute the OFF-DIAGONAL portion. The example below better 4046 illustrates this concept. 4047 4048 For a square global matrix we define each processor's diagonal portion 4049 to be its local rows and the corresponding columns (a square submatrix); 4050 each processor's off-diagonal portion encompasses the remainder of the 4051 local matrix (a rectangular submatrix). 4052 4053 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4054 4055 When calling this routine with a single process communicator, a matrix of 4056 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4057 type of communicator, use the construction mechanism: 4058 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4059 4060 By default, this format uses inodes (identical nodes) when possible. 4061 We search for consecutive rows with the same nonzero structure, thereby 4062 reusing matrix information to achieve increased efficiency. 4063 4064 Options Database Keys: 4065 + -mat_no_inode - Do not use inodes 4066 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4067 - -mat_aij_oneindex - Internally use indexing starting at 1 4068 rather than 0. Note that when calling MatSetValues(), 4069 the user still MUST index entries starting at 0! 4070 4071 4072 Example usage: 4073 4074 Consider the following 8x8 matrix with 34 non-zero values, that is 4075 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4076 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4077 as follows: 4078 4079 .vb 4080 1 2 0 | 0 3 0 | 0 4 4081 Proc0 0 5 6 | 7 0 0 | 8 0 4082 9 0 10 | 11 0 0 | 12 0 4083 ------------------------------------- 4084 13 0 14 | 15 16 17 | 0 0 4085 Proc1 0 18 0 | 19 20 21 | 0 0 4086 0 0 0 | 22 23 0 | 24 0 4087 ------------------------------------- 4088 Proc2 25 26 27 | 0 0 28 | 29 0 4089 30 0 0 | 31 32 33 | 0 34 4090 .ve 4091 4092 This can be represented as a collection of submatrices as: 4093 4094 .vb 4095 A B C 4096 D E F 4097 G H I 4098 .ve 4099 4100 Where the submatrices A,B,C are owned by proc0, D,E,F are 4101 owned by proc1, G,H,I are owned by proc2. 4102 4103 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4104 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4105 The 'M','N' parameters are 8,8, and have the same values on all procs. 4106 4107 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4108 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4109 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4110 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4111 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4112 matrix, ans [DF] as another SeqAIJ matrix. 4113 4114 When d_nz, o_nz parameters are specified, d_nz storage elements are 4115 allocated for every row of the local diagonal submatrix, and o_nz 4116 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4117 One way to choose d_nz and o_nz is to use the max nonzerors per local 4118 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4119 In this case, the values of d_nz,o_nz are: 4120 .vb 4121 proc0 : dnz = 2, o_nz = 2 4122 proc1 : dnz = 3, o_nz = 2 4123 proc2 : dnz = 1, o_nz = 4 4124 .ve 4125 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4126 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4127 for proc3. i.e we are using 12+15+10=37 storage locations to store 4128 34 values. 4129 4130 When d_nnz, o_nnz parameters are specified, the storage is specified 4131 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4132 In the above case the values for d_nnz,o_nnz are: 4133 .vb 4134 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4135 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4136 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4137 .ve 4138 Here the space allocated is sum of all the above values i.e 34, and 4139 hence pre-allocation is perfect. 4140 4141 Level: intermediate 4142 4143 .keywords: matrix, aij, compressed row, sparse, parallel 4144 4145 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4146 MPIAIJ, MatCreateMPIAIJWithArrays() 4147 @*/ 4148 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4149 { 4150 PetscErrorCode ierr; 4151 PetscMPIInt size; 4152 4153 PetscFunctionBegin; 4154 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4155 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4156 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4157 if (size > 1) { 4158 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4159 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4160 } else { 4161 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4162 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4163 } 4164 PetscFunctionReturn(0); 4165 } 4166 4167 #undef __FUNCT__ 4168 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4169 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4170 { 4171 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4172 4173 PetscFunctionBegin; 4174 *Ad = a->A; 4175 *Ao = a->B; 4176 *colmap = a->garray; 4177 PetscFunctionReturn(0); 4178 } 4179 4180 #undef __FUNCT__ 4181 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4182 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4183 { 4184 PetscErrorCode ierr; 4185 PetscInt i; 4186 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4187 4188 PetscFunctionBegin; 4189 if (coloring->ctype == IS_COLORING_GLOBAL) { 4190 ISColoringValue *allcolors,*colors; 4191 ISColoring ocoloring; 4192 4193 /* set coloring for diagonal portion */ 4194 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4195 4196 /* set coloring for off-diagonal portion */ 4197 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4198 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4199 for (i=0; i<a->B->cmap->n; i++) { 4200 colors[i] = allcolors[a->garray[i]]; 4201 } 4202 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4203 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4204 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4205 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4206 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4207 ISColoringValue *colors; 4208 PetscInt *larray; 4209 ISColoring ocoloring; 4210 4211 /* set coloring for diagonal portion */ 4212 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4213 for (i=0; i<a->A->cmap->n; i++) { 4214 larray[i] = i + A->cmap->rstart; 4215 } 4216 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4217 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4218 for (i=0; i<a->A->cmap->n; i++) { 4219 colors[i] = coloring->colors[larray[i]]; 4220 } 4221 ierr = PetscFree(larray);CHKERRQ(ierr); 4222 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4223 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4224 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4225 4226 /* set coloring for off-diagonal portion */ 4227 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4228 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4229 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4230 for (i=0; i<a->B->cmap->n; i++) { 4231 colors[i] = coloring->colors[larray[i]]; 4232 } 4233 ierr = PetscFree(larray);CHKERRQ(ierr); 4234 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4235 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4236 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4237 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4238 4239 PetscFunctionReturn(0); 4240 } 4241 4242 #if defined(PETSC_HAVE_ADIC) 4243 #undef __FUNCT__ 4244 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4245 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4246 { 4247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4248 PetscErrorCode ierr; 4249 4250 PetscFunctionBegin; 4251 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4252 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4253 PetscFunctionReturn(0); 4254 } 4255 #endif 4256 4257 #undef __FUNCT__ 4258 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4259 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4260 { 4261 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4262 PetscErrorCode ierr; 4263 4264 PetscFunctionBegin; 4265 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4266 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4267 PetscFunctionReturn(0); 4268 } 4269 4270 #undef __FUNCT__ 4271 #define __FUNCT__ "MatMergeSymbolic" 4272 PetscErrorCode MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4273 { 4274 PetscErrorCode ierr; 4275 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum; 4276 PetscInt *indx; 4277 4278 PetscFunctionBegin; 4279 /* This routine will ONLY return MPIAIJ type matrix */ 4280 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4281 if (n == PETSC_DECIDE){ 4282 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4283 } 4284 /* Check sum(n) = N */ 4285 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPIU_SUM,comm);CHKERRQ(ierr); 4286 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4287 4288 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4289 rstart -= m; 4290 4291 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4292 for (i=0;i<m;i++) { 4293 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4294 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4295 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4296 } 4297 4298 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4299 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4300 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4301 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4302 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4303 PetscFunctionReturn(0); 4304 } 4305 4306 #undef __FUNCT__ 4307 #define __FUNCT__ "MatMergeNumeric" 4308 PetscErrorCode MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4309 { 4310 PetscErrorCode ierr; 4311 PetscInt m,N,i,rstart,nnz,Ii; 4312 PetscInt *indx; 4313 PetscScalar *values; 4314 4315 PetscFunctionBegin; 4316 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4317 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4318 for (i=0;i<m;i++) { 4319 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4320 Ii = i + rstart; 4321 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4322 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4323 } 4324 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4325 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4326 PetscFunctionReturn(0); 4327 } 4328 4329 #undef __FUNCT__ 4330 #define __FUNCT__ "MatMerge" 4331 /*@ 4332 MatMerge - Creates a single large PETSc matrix by concatenating sequential 4333 matrices from each processor 4334 4335 Collective on MPI_Comm 4336 4337 Input Parameters: 4338 + comm - the communicators the parallel matrix will live on 4339 . inmat - the input sequential matrices 4340 . n - number of local columns (or PETSC_DECIDE) 4341 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4342 4343 Output Parameter: 4344 . outmat - the parallel matrix generated 4345 4346 Level: advanced 4347 4348 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4349 4350 @*/ 4351 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4352 { 4353 PetscErrorCode ierr; 4354 4355 PetscFunctionBegin; 4356 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4357 if (scall == MAT_INITIAL_MATRIX){ 4358 ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4359 } 4360 ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4361 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4362 PetscFunctionReturn(0); 4363 } 4364 4365 #undef __FUNCT__ 4366 #define __FUNCT__ "MatFileSplit" 4367 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4368 { 4369 PetscErrorCode ierr; 4370 PetscMPIInt rank; 4371 PetscInt m,N,i,rstart,nnz; 4372 size_t len; 4373 const PetscInt *indx; 4374 PetscViewer out; 4375 char *name; 4376 Mat B; 4377 const PetscScalar *values; 4378 4379 PetscFunctionBegin; 4380 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4381 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4382 /* Should this be the type of the diagonal block of A? */ 4383 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4384 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4385 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4386 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4387 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4388 for (i=0;i<m;i++) { 4389 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4390 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4391 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4392 } 4393 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4394 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4395 4396 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4397 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4398 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4399 sprintf(name,"%s.%d",outfile,rank); 4400 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4401 ierr = PetscFree(name); 4402 ierr = MatView(B,out);CHKERRQ(ierr); 4403 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4404 ierr = MatDestroy(&B);CHKERRQ(ierr); 4405 PetscFunctionReturn(0); 4406 } 4407 4408 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4409 #undef __FUNCT__ 4410 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4411 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4412 { 4413 PetscErrorCode ierr; 4414 Mat_Merge_SeqsToMPI *merge; 4415 PetscContainer container; 4416 4417 PetscFunctionBegin; 4418 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4419 if (container) { 4420 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4421 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4423 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4424 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4425 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4426 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4427 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4428 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4429 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4430 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4431 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4432 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4433 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4434 ierr = PetscFree(merge);CHKERRQ(ierr); 4435 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4436 } 4437 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4438 PetscFunctionReturn(0); 4439 } 4440 4441 #include <../src/mat/utils/freespace.h> 4442 #include <petscbt.h> 4443 4444 #undef __FUNCT__ 4445 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4446 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4447 { 4448 PetscErrorCode ierr; 4449 MPI_Comm comm=((PetscObject)mpimat)->comm; 4450 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4451 PetscMPIInt size,rank,taga,*len_s; 4452 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4453 PetscInt proc,m; 4454 PetscInt **buf_ri,**buf_rj; 4455 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4456 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4457 MPI_Request *s_waits,*r_waits; 4458 MPI_Status *status; 4459 MatScalar *aa=a->a; 4460 MatScalar **abuf_r,*ba_i; 4461 Mat_Merge_SeqsToMPI *merge; 4462 PetscContainer container; 4463 4464 PetscFunctionBegin; 4465 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4466 4467 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4468 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4469 4470 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4471 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4472 4473 bi = merge->bi; 4474 bj = merge->bj; 4475 buf_ri = merge->buf_ri; 4476 buf_rj = merge->buf_rj; 4477 4478 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4479 owners = merge->rowmap->range; 4480 len_s = merge->len_s; 4481 4482 /* send and recv matrix values */ 4483 /*-----------------------------*/ 4484 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4485 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4486 4487 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4488 for (proc=0,k=0; proc<size; proc++){ 4489 if (!len_s[proc]) continue; 4490 i = owners[proc]; 4491 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4492 k++; 4493 } 4494 4495 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4496 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4497 ierr = PetscFree(status);CHKERRQ(ierr); 4498 4499 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4500 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4501 4502 /* insert mat values of mpimat */ 4503 /*----------------------------*/ 4504 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4505 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4506 4507 for (k=0; k<merge->nrecv; k++){ 4508 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4509 nrows = *(buf_ri_k[k]); 4510 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4511 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4512 } 4513 4514 /* set values of ba */ 4515 m = merge->rowmap->n; 4516 for (i=0; i<m; i++) { 4517 arow = owners[rank] + i; 4518 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4519 bnzi = bi[i+1] - bi[i]; 4520 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4521 4522 /* add local non-zero vals of this proc's seqmat into ba */ 4523 anzi = ai[arow+1] - ai[arow]; 4524 aj = a->j + ai[arow]; 4525 aa = a->a + ai[arow]; 4526 nextaj = 0; 4527 for (j=0; nextaj<anzi; j++){ 4528 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4529 ba_i[j] += aa[nextaj++]; 4530 } 4531 } 4532 4533 /* add received vals into ba */ 4534 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4535 /* i-th row */ 4536 if (i == *nextrow[k]) { 4537 anzi = *(nextai[k]+1) - *nextai[k]; 4538 aj = buf_rj[k] + *(nextai[k]); 4539 aa = abuf_r[k] + *(nextai[k]); 4540 nextaj = 0; 4541 for (j=0; nextaj<anzi; j++){ 4542 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4543 ba_i[j] += aa[nextaj++]; 4544 } 4545 } 4546 nextrow[k]++; nextai[k]++; 4547 } 4548 } 4549 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4550 } 4551 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4552 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4553 4554 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4555 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4556 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4557 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4558 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4559 PetscFunctionReturn(0); 4560 } 4561 4562 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4563 4564 #undef __FUNCT__ 4565 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4566 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4567 { 4568 PetscErrorCode ierr; 4569 Mat B_mpi; 4570 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4571 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4572 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4573 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4574 PetscInt len,proc,*dnz,*onz; 4575 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4576 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4577 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4578 MPI_Status *status; 4579 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4580 PetscBT lnkbt; 4581 Mat_Merge_SeqsToMPI *merge; 4582 PetscContainer container; 4583 4584 PetscFunctionBegin; 4585 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4586 4587 /* make sure it is a PETSc comm */ 4588 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4589 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4590 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4591 4592 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4593 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4594 4595 /* determine row ownership */ 4596 /*---------------------------------------------------------*/ 4597 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4598 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4599 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4600 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4601 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4602 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4603 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4604 4605 m = merge->rowmap->n; 4606 M = merge->rowmap->N; 4607 owners = merge->rowmap->range; 4608 4609 /* determine the number of messages to send, their lengths */ 4610 /*---------------------------------------------------------*/ 4611 len_s = merge->len_s; 4612 4613 len = 0; /* length of buf_si[] */ 4614 merge->nsend = 0; 4615 for (proc=0; proc<size; proc++){ 4616 len_si[proc] = 0; 4617 if (proc == rank){ 4618 len_s[proc] = 0; 4619 } else { 4620 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4621 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4622 } 4623 if (len_s[proc]) { 4624 merge->nsend++; 4625 nrows = 0; 4626 for (i=owners[proc]; i<owners[proc+1]; i++){ 4627 if (ai[i+1] > ai[i]) nrows++; 4628 } 4629 len_si[proc] = 2*(nrows+1); 4630 len += len_si[proc]; 4631 } 4632 } 4633 4634 /* determine the number and length of messages to receive for ij-structure */ 4635 /*-------------------------------------------------------------------------*/ 4636 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4637 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4638 4639 /* post the Irecv of j-structure */ 4640 /*-------------------------------*/ 4641 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4642 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4643 4644 /* post the Isend of j-structure */ 4645 /*--------------------------------*/ 4646 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4647 4648 for (proc=0, k=0; proc<size; proc++){ 4649 if (!len_s[proc]) continue; 4650 i = owners[proc]; 4651 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4652 k++; 4653 } 4654 4655 /* receives and sends of j-structure are complete */ 4656 /*------------------------------------------------*/ 4657 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4658 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4659 4660 /* send and recv i-structure */ 4661 /*---------------------------*/ 4662 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4663 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4664 4665 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4666 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4667 for (proc=0,k=0; proc<size; proc++){ 4668 if (!len_s[proc]) continue; 4669 /* form outgoing message for i-structure: 4670 buf_si[0]: nrows to be sent 4671 [1:nrows]: row index (global) 4672 [nrows+1:2*nrows+1]: i-structure index 4673 */ 4674 /*-------------------------------------------*/ 4675 nrows = len_si[proc]/2 - 1; 4676 buf_si_i = buf_si + nrows+1; 4677 buf_si[0] = nrows; 4678 buf_si_i[0] = 0; 4679 nrows = 0; 4680 for (i=owners[proc]; i<owners[proc+1]; i++){ 4681 anzi = ai[i+1] - ai[i]; 4682 if (anzi) { 4683 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4684 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4685 nrows++; 4686 } 4687 } 4688 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4689 k++; 4690 buf_si += len_si[proc]; 4691 } 4692 4693 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4694 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4695 4696 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4697 for (i=0; i<merge->nrecv; i++){ 4698 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4699 } 4700 4701 ierr = PetscFree(len_si);CHKERRQ(ierr); 4702 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4703 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4704 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4705 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4706 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4707 ierr = PetscFree(status);CHKERRQ(ierr); 4708 4709 /* compute a local seq matrix in each processor */ 4710 /*----------------------------------------------*/ 4711 /* allocate bi array and free space for accumulating nonzero column info */ 4712 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4713 bi[0] = 0; 4714 4715 /* create and initialize a linked list */ 4716 nlnk = N+1; 4717 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4718 4719 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4720 len = 0; 4721 len = ai[owners[rank+1]] - ai[owners[rank]]; 4722 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4723 current_space = free_space; 4724 4725 /* determine symbolic info for each local row */ 4726 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4727 4728 for (k=0; k<merge->nrecv; k++){ 4729 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4730 nrows = *buf_ri_k[k]; 4731 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4732 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4733 } 4734 4735 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4736 len = 0; 4737 for (i=0;i<m;i++) { 4738 bnzi = 0; 4739 /* add local non-zero cols of this proc's seqmat into lnk */ 4740 arow = owners[rank] + i; 4741 anzi = ai[arow+1] - ai[arow]; 4742 aj = a->j + ai[arow]; 4743 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4744 bnzi += nlnk; 4745 /* add received col data into lnk */ 4746 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4747 if (i == *nextrow[k]) { /* i-th row */ 4748 anzi = *(nextai[k]+1) - *nextai[k]; 4749 aj = buf_rj[k] + *nextai[k]; 4750 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4751 bnzi += nlnk; 4752 nextrow[k]++; nextai[k]++; 4753 } 4754 } 4755 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4756 4757 /* if free space is not available, make more free space */ 4758 if (current_space->local_remaining<bnzi) { 4759 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4760 nspacedouble++; 4761 } 4762 /* copy data into free space, then initialize lnk */ 4763 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4764 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4765 4766 current_space->array += bnzi; 4767 current_space->local_used += bnzi; 4768 current_space->local_remaining -= bnzi; 4769 4770 bi[i+1] = bi[i] + bnzi; 4771 } 4772 4773 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4774 4775 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4776 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4777 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4778 4779 /* create symbolic parallel matrix B_mpi */ 4780 /*---------------------------------------*/ 4781 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4782 if (n==PETSC_DECIDE) { 4783 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4784 } else { 4785 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4786 } 4787 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4788 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4789 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4790 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4791 4792 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4793 B_mpi->assembled = PETSC_FALSE; 4794 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4795 merge->bi = bi; 4796 merge->bj = bj; 4797 merge->buf_ri = buf_ri; 4798 merge->buf_rj = buf_rj; 4799 merge->coi = PETSC_NULL; 4800 merge->coj = PETSC_NULL; 4801 merge->owners_co = PETSC_NULL; 4802 4803 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4804 4805 /* attach the supporting struct to B_mpi for reuse */ 4806 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4807 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4808 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4809 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4810 *mpimat = B_mpi; 4811 4812 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4813 PetscFunctionReturn(0); 4814 } 4815 4816 #undef __FUNCT__ 4817 #define __FUNCT__ "MatMerge_SeqsToMPI" 4818 /*@C 4819 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4820 matrices from each processor 4821 4822 Collective on MPI_Comm 4823 4824 Input Parameters: 4825 + comm - the communicators the parallel matrix will live on 4826 . seqmat - the input sequential matrices 4827 . m - number of local rows (or PETSC_DECIDE) 4828 . n - number of local columns (or PETSC_DECIDE) 4829 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4830 4831 Output Parameter: 4832 . mpimat - the parallel matrix generated 4833 4834 Level: advanced 4835 4836 Notes: 4837 The dimensions of the sequential matrix in each processor MUST be the same. 4838 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4839 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4840 @*/ 4841 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4842 { 4843 PetscErrorCode ierr; 4844 PetscMPIInt size; 4845 4846 PetscFunctionBegin; 4847 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4848 if (size == 1){ 4849 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4850 if (scall == MAT_INITIAL_MATRIX){ 4851 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4852 } else { 4853 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4854 } 4855 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4856 PetscFunctionReturn(0); 4857 } 4858 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4859 if (scall == MAT_INITIAL_MATRIX){ 4860 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4861 } 4862 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4863 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4864 PetscFunctionReturn(0); 4865 } 4866 4867 #undef __FUNCT__ 4868 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4869 /*@ 4870 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4871 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4872 with MatGetSize() 4873 4874 Not Collective 4875 4876 Input Parameters: 4877 + A - the matrix 4878 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4879 4880 Output Parameter: 4881 . A_loc - the local sequential matrix generated 4882 4883 Level: developer 4884 4885 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4886 4887 @*/ 4888 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4889 { 4890 PetscErrorCode ierr; 4891 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4892 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4893 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4894 MatScalar *aa=a->a,*ba=b->a,*cam; 4895 PetscScalar *ca; 4896 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4897 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4898 PetscBool match; 4899 4900 PetscFunctionBegin; 4901 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4902 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4903 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4904 if (scall == MAT_INITIAL_MATRIX){ 4905 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4906 ci[0] = 0; 4907 for (i=0; i<am; i++){ 4908 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4909 } 4910 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4911 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4912 k = 0; 4913 for (i=0; i<am; i++) { 4914 ncols_o = bi[i+1] - bi[i]; 4915 ncols_d = ai[i+1] - ai[i]; 4916 /* off-diagonal portion of A */ 4917 for (jo=0; jo<ncols_o; jo++) { 4918 col = cmap[*bj]; 4919 if (col >= cstart) break; 4920 cj[k] = col; bj++; 4921 ca[k++] = *ba++; 4922 } 4923 /* diagonal portion of A */ 4924 for (j=0; j<ncols_d; j++) { 4925 cj[k] = cstart + *aj++; 4926 ca[k++] = *aa++; 4927 } 4928 /* off-diagonal portion of A */ 4929 for (j=jo; j<ncols_o; j++) { 4930 cj[k] = cmap[*bj++]; 4931 ca[k++] = *ba++; 4932 } 4933 } 4934 /* put together the new matrix */ 4935 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4936 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4937 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4938 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4939 mat->free_a = PETSC_TRUE; 4940 mat->free_ij = PETSC_TRUE; 4941 mat->nonew = 0; 4942 } else if (scall == MAT_REUSE_MATRIX){ 4943 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4944 ci = mat->i; cj = mat->j; cam = mat->a; 4945 for (i=0; i<am; i++) { 4946 /* off-diagonal portion of A */ 4947 ncols_o = bi[i+1] - bi[i]; 4948 for (jo=0; jo<ncols_o; jo++) { 4949 col = cmap[*bj]; 4950 if (col >= cstart) break; 4951 *cam++ = *ba++; bj++; 4952 } 4953 /* diagonal portion of A */ 4954 ncols_d = ai[i+1] - ai[i]; 4955 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4956 /* off-diagonal portion of A */ 4957 for (j=jo; j<ncols_o; j++) { 4958 *cam++ = *ba++; bj++; 4959 } 4960 } 4961 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4962 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4963 PetscFunctionReturn(0); 4964 } 4965 4966 #undef __FUNCT__ 4967 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4968 /*@C 4969 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4970 4971 Not Collective 4972 4973 Input Parameters: 4974 + A - the matrix 4975 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4976 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4977 4978 Output Parameter: 4979 . A_loc - the local sequential matrix generated 4980 4981 Level: developer 4982 4983 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4984 4985 @*/ 4986 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4987 { 4988 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4989 PetscErrorCode ierr; 4990 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4991 IS isrowa,iscola; 4992 Mat *aloc; 4993 PetscBool match; 4994 4995 PetscFunctionBegin; 4996 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4997 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4998 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4999 if (!row){ 5000 start = A->rmap->rstart; end = A->rmap->rend; 5001 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5002 } else { 5003 isrowa = *row; 5004 } 5005 if (!col){ 5006 start = A->cmap->rstart; 5007 cmap = a->garray; 5008 nzA = a->A->cmap->n; 5009 nzB = a->B->cmap->n; 5010 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5011 ncols = 0; 5012 for (i=0; i<nzB; i++) { 5013 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5014 else break; 5015 } 5016 imark = i; 5017 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5018 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5019 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5020 } else { 5021 iscola = *col; 5022 } 5023 if (scall != MAT_INITIAL_MATRIX){ 5024 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5025 aloc[0] = *A_loc; 5026 } 5027 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5028 *A_loc = aloc[0]; 5029 ierr = PetscFree(aloc);CHKERRQ(ierr); 5030 if (!row){ 5031 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5032 } 5033 if (!col){ 5034 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5035 } 5036 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5037 PetscFunctionReturn(0); 5038 } 5039 5040 #undef __FUNCT__ 5041 #define __FUNCT__ "MatGetBrowsOfAcols" 5042 /*@C 5043 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5044 5045 Collective on Mat 5046 5047 Input Parameters: 5048 + A,B - the matrices in mpiaij format 5049 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5050 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5051 5052 Output Parameter: 5053 + rowb, colb - index sets of rows and columns of B to extract 5054 - B_seq - the sequential matrix generated 5055 5056 Level: developer 5057 5058 @*/ 5059 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5060 { 5061 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5062 PetscErrorCode ierr; 5063 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5064 IS isrowb,iscolb; 5065 Mat *bseq=PETSC_NULL; 5066 5067 PetscFunctionBegin; 5068 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5069 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5070 } 5071 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5072 5073 if (scall == MAT_INITIAL_MATRIX){ 5074 start = A->cmap->rstart; 5075 cmap = a->garray; 5076 nzA = a->A->cmap->n; 5077 nzB = a->B->cmap->n; 5078 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5079 ncols = 0; 5080 for (i=0; i<nzB; i++) { /* row < local row index */ 5081 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5082 else break; 5083 } 5084 imark = i; 5085 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5086 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5087 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5088 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5089 } else { 5090 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5091 isrowb = *rowb; iscolb = *colb; 5092 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5093 bseq[0] = *B_seq; 5094 } 5095 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5096 *B_seq = bseq[0]; 5097 ierr = PetscFree(bseq);CHKERRQ(ierr); 5098 if (!rowb){ 5099 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5100 } else { 5101 *rowb = isrowb; 5102 } 5103 if (!colb){ 5104 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5105 } else { 5106 *colb = iscolb; 5107 } 5108 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5109 PetscFunctionReturn(0); 5110 } 5111 5112 #undef __FUNCT__ 5113 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5114 /* 5115 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5116 of the OFF-DIAGONAL portion of local A 5117 5118 Collective on Mat 5119 5120 Input Parameters: 5121 + A,B - the matrices in mpiaij format 5122 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5123 5124 Output Parameter: 5125 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5126 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5127 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5128 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5129 5130 Level: developer 5131 5132 */ 5133 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5134 { 5135 VecScatter_MPI_General *gen_to,*gen_from; 5136 PetscErrorCode ierr; 5137 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5138 Mat_SeqAIJ *b_oth; 5139 VecScatter ctx=a->Mvctx; 5140 MPI_Comm comm=((PetscObject)ctx)->comm; 5141 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5142 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5143 PetscScalar *rvalues,*svalues; 5144 MatScalar *b_otha,*bufa,*bufA; 5145 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5146 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5147 MPI_Status *sstatus,rstatus; 5148 PetscMPIInt jj; 5149 PetscInt *cols,sbs,rbs; 5150 PetscScalar *vals; 5151 5152 PetscFunctionBegin; 5153 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5154 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5155 } 5156 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5157 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5158 5159 gen_to = (VecScatter_MPI_General*)ctx->todata; 5160 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5161 rvalues = gen_from->values; /* holds the length of receiving row */ 5162 svalues = gen_to->values; /* holds the length of sending row */ 5163 nrecvs = gen_from->n; 5164 nsends = gen_to->n; 5165 5166 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5167 srow = gen_to->indices; /* local row index to be sent */ 5168 sstarts = gen_to->starts; 5169 sprocs = gen_to->procs; 5170 sstatus = gen_to->sstatus; 5171 sbs = gen_to->bs; 5172 rstarts = gen_from->starts; 5173 rprocs = gen_from->procs; 5174 rbs = gen_from->bs; 5175 5176 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5177 if (scall == MAT_INITIAL_MATRIX){ 5178 /* i-array */ 5179 /*---------*/ 5180 /* post receives */ 5181 for (i=0; i<nrecvs; i++){ 5182 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5183 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5184 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5185 } 5186 5187 /* pack the outgoing message */ 5188 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5189 sstartsj[0] = 0; rstartsj[0] = 0; 5190 len = 0; /* total length of j or a array to be sent */ 5191 k = 0; 5192 for (i=0; i<nsends; i++){ 5193 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5194 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5195 for (j=0; j<nrows; j++) { 5196 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5197 for (l=0; l<sbs; l++){ 5198 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5199 rowlen[j*sbs+l] = ncols; 5200 len += ncols; 5201 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5202 } 5203 k++; 5204 } 5205 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5206 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5207 } 5208 /* recvs and sends of i-array are completed */ 5209 i = nrecvs; 5210 while (i--) { 5211 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5212 } 5213 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5214 5215 /* allocate buffers for sending j and a arrays */ 5216 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5217 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5218 5219 /* create i-array of B_oth */ 5220 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5221 b_othi[0] = 0; 5222 len = 0; /* total length of j or a array to be received */ 5223 k = 0; 5224 for (i=0; i<nrecvs; i++){ 5225 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5226 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5227 for (j=0; j<nrows; j++) { 5228 b_othi[k+1] = b_othi[k] + rowlen[j]; 5229 len += rowlen[j]; k++; 5230 } 5231 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5232 } 5233 5234 /* allocate space for j and a arrrays of B_oth */ 5235 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5236 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5237 5238 /* j-array */ 5239 /*---------*/ 5240 /* post receives of j-array */ 5241 for (i=0; i<nrecvs; i++){ 5242 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5243 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5244 } 5245 5246 /* pack the outgoing message j-array */ 5247 k = 0; 5248 for (i=0; i<nsends; i++){ 5249 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5250 bufJ = bufj+sstartsj[i]; 5251 for (j=0; j<nrows; j++) { 5252 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5253 for (ll=0; ll<sbs; ll++){ 5254 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5255 for (l=0; l<ncols; l++){ 5256 *bufJ++ = cols[l]; 5257 } 5258 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5259 } 5260 } 5261 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5262 } 5263 5264 /* recvs and sends of j-array are completed */ 5265 i = nrecvs; 5266 while (i--) { 5267 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5268 } 5269 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5270 } else if (scall == MAT_REUSE_MATRIX){ 5271 sstartsj = *startsj_s; 5272 rstartsj = *startsj_r; 5273 bufa = *bufa_ptr; 5274 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5275 b_otha = b_oth->a; 5276 } else { 5277 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5278 } 5279 5280 /* a-array */ 5281 /*---------*/ 5282 /* post receives of a-array */ 5283 for (i=0; i<nrecvs; i++){ 5284 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5285 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5286 } 5287 5288 /* pack the outgoing message a-array */ 5289 k = 0; 5290 for (i=0; i<nsends; i++){ 5291 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5292 bufA = bufa+sstartsj[i]; 5293 for (j=0; j<nrows; j++) { 5294 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5295 for (ll=0; ll<sbs; ll++){ 5296 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5297 for (l=0; l<ncols; l++){ 5298 *bufA++ = vals[l]; 5299 } 5300 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5301 } 5302 } 5303 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5304 } 5305 /* recvs and sends of a-array are completed */ 5306 i = nrecvs; 5307 while (i--) { 5308 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5309 } 5310 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5311 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5312 5313 if (scall == MAT_INITIAL_MATRIX){ 5314 /* put together the new matrix */ 5315 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5316 5317 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5318 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5319 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5320 b_oth->free_a = PETSC_TRUE; 5321 b_oth->free_ij = PETSC_TRUE; 5322 b_oth->nonew = 0; 5323 5324 ierr = PetscFree(bufj);CHKERRQ(ierr); 5325 if (!startsj_s || !bufa_ptr){ 5326 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5327 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5328 } else { 5329 *startsj_s = sstartsj; 5330 *startsj_r = rstartsj; 5331 *bufa_ptr = bufa; 5332 } 5333 } 5334 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5335 PetscFunctionReturn(0); 5336 } 5337 5338 #undef __FUNCT__ 5339 #define __FUNCT__ "MatGetCommunicationStructs" 5340 /*@C 5341 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5342 5343 Not Collective 5344 5345 Input Parameters: 5346 . A - The matrix in mpiaij format 5347 5348 Output Parameter: 5349 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5350 . colmap - A map from global column index to local index into lvec 5351 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5352 5353 Level: developer 5354 5355 @*/ 5356 #if defined (PETSC_USE_CTABLE) 5357 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5358 #else 5359 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5360 #endif 5361 { 5362 Mat_MPIAIJ *a; 5363 5364 PetscFunctionBegin; 5365 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5366 PetscValidPointer(lvec, 2); 5367 PetscValidPointer(colmap, 3); 5368 PetscValidPointer(multScatter, 4); 5369 a = (Mat_MPIAIJ *) A->data; 5370 if (lvec) *lvec = a->lvec; 5371 if (colmap) *colmap = a->colmap; 5372 if (multScatter) *multScatter = a->Mvctx; 5373 PetscFunctionReturn(0); 5374 } 5375 5376 EXTERN_C_BEGIN 5377 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5378 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5379 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5380 EXTERN_C_END 5381 5382 #undef __FUNCT__ 5383 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5384 /* 5385 Computes (B'*A')' since computing B*A directly is untenable 5386 5387 n p p 5388 ( ) ( ) ( ) 5389 m ( A ) * n ( B ) = m ( C ) 5390 ( ) ( ) ( ) 5391 5392 */ 5393 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5394 { 5395 PetscErrorCode ierr; 5396 Mat At,Bt,Ct; 5397 5398 PetscFunctionBegin; 5399 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5400 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5401 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5402 ierr = MatDestroy(&At);CHKERRQ(ierr); 5403 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5404 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5405 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5406 PetscFunctionReturn(0); 5407 } 5408 5409 #undef __FUNCT__ 5410 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5411 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5412 { 5413 PetscErrorCode ierr; 5414 PetscInt m=A->rmap->n,n=B->cmap->n; 5415 Mat Cmat; 5416 5417 PetscFunctionBegin; 5418 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5419 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5420 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5421 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5422 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5423 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5424 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5425 *C = Cmat; 5426 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5427 PetscFunctionReturn(0); 5428 } 5429 5430 /* ----------------------------------------------------------------*/ 5431 #undef __FUNCT__ 5432 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5433 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5434 { 5435 PetscErrorCode ierr; 5436 5437 PetscFunctionBegin; 5438 if (scall == MAT_INITIAL_MATRIX){ 5439 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5440 } 5441 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5442 PetscFunctionReturn(0); 5443 } 5444 5445 EXTERN_C_BEGIN 5446 #if defined(PETSC_HAVE_MUMPS) 5447 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5448 #endif 5449 #if defined(PETSC_HAVE_PASTIX) 5450 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5451 #endif 5452 #if defined(PETSC_HAVE_SUPERLU_DIST) 5453 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5454 #endif 5455 #if defined(PETSC_HAVE_SPOOLES) 5456 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5457 #endif 5458 EXTERN_C_END 5459 5460 /*MC 5461 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5462 5463 Options Database Keys: 5464 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5465 5466 Level: beginner 5467 5468 .seealso: MatCreateAIJ() 5469 M*/ 5470 5471 EXTERN_C_BEGIN 5472 #undef __FUNCT__ 5473 #define __FUNCT__ "MatCreate_MPIAIJ" 5474 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5475 { 5476 Mat_MPIAIJ *b; 5477 PetscErrorCode ierr; 5478 PetscMPIInt size; 5479 5480 PetscFunctionBegin; 5481 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5482 5483 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5484 B->data = (void*)b; 5485 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5486 B->rmap->bs = 1; 5487 B->assembled = PETSC_FALSE; 5488 5489 B->insertmode = NOT_SET_VALUES; 5490 b->size = size; 5491 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5492 5493 /* build cache for off array entries formed */ 5494 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5495 b->donotstash = PETSC_FALSE; 5496 b->colmap = 0; 5497 b->garray = 0; 5498 b->roworiented = PETSC_TRUE; 5499 5500 /* stuff used for matrix vector multiply */ 5501 b->lvec = PETSC_NULL; 5502 b->Mvctx = PETSC_NULL; 5503 5504 /* stuff for MatGetRow() */ 5505 b->rowindices = 0; 5506 b->rowvalues = 0; 5507 b->getrowactive = PETSC_FALSE; 5508 5509 #if defined(PETSC_HAVE_SPOOLES) 5510 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5511 "MatGetFactor_mpiaij_spooles", 5512 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5513 #endif 5514 #if defined(PETSC_HAVE_MUMPS) 5515 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5516 "MatGetFactor_aij_mumps", 5517 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5518 #endif 5519 #if defined(PETSC_HAVE_PASTIX) 5520 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5521 "MatGetFactor_mpiaij_pastix", 5522 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5523 #endif 5524 #if defined(PETSC_HAVE_SUPERLU_DIST) 5525 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5526 "MatGetFactor_mpiaij_superlu_dist", 5527 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5528 #endif 5529 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5530 "MatStoreValues_MPIAIJ", 5531 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5532 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5533 "MatRetrieveValues_MPIAIJ", 5534 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5535 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5536 "MatGetDiagonalBlock_MPIAIJ", 5537 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5538 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5539 "MatIsTranspose_MPIAIJ", 5540 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5541 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5542 "MatMPIAIJSetPreallocation_MPIAIJ", 5543 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5544 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5545 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5546 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5547 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5548 "MatDiagonalScaleLocal_MPIAIJ", 5549 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5550 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5551 "MatConvert_MPIAIJ_MPIAIJPERM", 5552 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5553 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5554 "MatConvert_MPIAIJ_MPIAIJCRL", 5555 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5556 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5557 "MatConvert_MPIAIJ_MPISBAIJ", 5558 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5559 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5560 "MatMatMult_MPIDense_MPIAIJ", 5561 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5562 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5563 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5564 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5565 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5566 "MatMatMultNumeric_MPIDense_MPIAIJ", 5567 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5568 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5569 PetscFunctionReturn(0); 5570 } 5571 EXTERN_C_END 5572 5573 #undef __FUNCT__ 5574 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5575 /*@ 5576 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5577 and "off-diagonal" part of the matrix in CSR format. 5578 5579 Collective on MPI_Comm 5580 5581 Input Parameters: 5582 + comm - MPI communicator 5583 . m - number of local rows (Cannot be PETSC_DECIDE) 5584 . n - This value should be the same as the local size used in creating the 5585 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5586 calculated if N is given) For square matrices n is almost always m. 5587 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5588 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5589 . i - row indices for "diagonal" portion of matrix 5590 . j - column indices 5591 . a - matrix values 5592 . oi - row indices for "off-diagonal" portion of matrix 5593 . oj - column indices 5594 - oa - matrix values 5595 5596 Output Parameter: 5597 . mat - the matrix 5598 5599 Level: advanced 5600 5601 Notes: 5602 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5603 must free the arrays once the matrix has been destroyed and not before. 5604 5605 The i and j indices are 0 based 5606 5607 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5608 5609 This sets local rows and cannot be used to set off-processor values. 5610 5611 You cannot later use MatSetValues() to change values in this matrix. 5612 5613 .keywords: matrix, aij, compressed row, sparse, parallel 5614 5615 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5616 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5617 @*/ 5618 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5619 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5620 { 5621 PetscErrorCode ierr; 5622 Mat_MPIAIJ *maij; 5623 5624 PetscFunctionBegin; 5625 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5626 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5627 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5628 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5629 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5630 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5631 maij = (Mat_MPIAIJ*) (*mat)->data; 5632 maij->donotstash = PETSC_TRUE; 5633 (*mat)->preallocated = PETSC_TRUE; 5634 5635 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5636 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5637 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5638 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5639 5640 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5641 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5642 5643 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5644 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5645 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5646 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5647 5648 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5649 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5650 PetscFunctionReturn(0); 5651 } 5652 5653 /* 5654 Special version for direct calls from Fortran 5655 */ 5656 #include <petsc-private/fortranimpl.h> 5657 5658 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5659 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5660 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5661 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5662 #endif 5663 5664 /* Change these macros so can be used in void function */ 5665 #undef CHKERRQ 5666 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5667 #undef SETERRQ2 5668 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5669 #undef SETERRQ3 5670 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5671 #undef SETERRQ 5672 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5673 5674 EXTERN_C_BEGIN 5675 #undef __FUNCT__ 5676 #define __FUNCT__ "matsetvaluesmpiaij_" 5677 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5678 { 5679 Mat mat = *mmat; 5680 PetscInt m = *mm, n = *mn; 5681 InsertMode addv = *maddv; 5682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5683 PetscScalar value; 5684 PetscErrorCode ierr; 5685 5686 MatCheckPreallocated(mat,1); 5687 if (mat->insertmode == NOT_SET_VALUES) { 5688 mat->insertmode = addv; 5689 } 5690 #if defined(PETSC_USE_DEBUG) 5691 else if (mat->insertmode != addv) { 5692 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5693 } 5694 #endif 5695 { 5696 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5697 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5698 PetscBool roworiented = aij->roworiented; 5699 5700 /* Some Variables required in the macro */ 5701 Mat A = aij->A; 5702 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5703 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5704 MatScalar *aa = a->a; 5705 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5706 Mat B = aij->B; 5707 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5708 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5709 MatScalar *ba = b->a; 5710 5711 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5712 PetscInt nonew = a->nonew; 5713 MatScalar *ap1,*ap2; 5714 5715 PetscFunctionBegin; 5716 for (i=0; i<m; i++) { 5717 if (im[i] < 0) continue; 5718 #if defined(PETSC_USE_DEBUG) 5719 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5720 #endif 5721 if (im[i] >= rstart && im[i] < rend) { 5722 row = im[i] - rstart; 5723 lastcol1 = -1; 5724 rp1 = aj + ai[row]; 5725 ap1 = aa + ai[row]; 5726 rmax1 = aimax[row]; 5727 nrow1 = ailen[row]; 5728 low1 = 0; 5729 high1 = nrow1; 5730 lastcol2 = -1; 5731 rp2 = bj + bi[row]; 5732 ap2 = ba + bi[row]; 5733 rmax2 = bimax[row]; 5734 nrow2 = bilen[row]; 5735 low2 = 0; 5736 high2 = nrow2; 5737 5738 for (j=0; j<n; j++) { 5739 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5740 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5741 if (in[j] >= cstart && in[j] < cend){ 5742 col = in[j] - cstart; 5743 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5744 } else if (in[j] < 0) continue; 5745 #if defined(PETSC_USE_DEBUG) 5746 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5747 #endif 5748 else { 5749 if (mat->was_assembled) { 5750 if (!aij->colmap) { 5751 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5752 } 5753 #if defined (PETSC_USE_CTABLE) 5754 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5755 col--; 5756 #else 5757 col = aij->colmap[in[j]] - 1; 5758 #endif 5759 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5760 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5761 col = in[j]; 5762 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5763 B = aij->B; 5764 b = (Mat_SeqAIJ*)B->data; 5765 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5766 rp2 = bj + bi[row]; 5767 ap2 = ba + bi[row]; 5768 rmax2 = bimax[row]; 5769 nrow2 = bilen[row]; 5770 low2 = 0; 5771 high2 = nrow2; 5772 bm = aij->B->rmap->n; 5773 ba = b->a; 5774 } 5775 } else col = in[j]; 5776 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5777 } 5778 } 5779 } else { 5780 if (!aij->donotstash) { 5781 if (roworiented) { 5782 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5783 } else { 5784 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5785 } 5786 } 5787 } 5788 }} 5789 PetscFunctionReturnVoid(); 5790 } 5791 EXTERN_C_END 5792 5793