1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 876 { 877 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 880 PetscInt i,*owners = A->rmap->range; 881 PetscInt *nprocs,j,idx,nsends; 882 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 883 PetscInt *rvalues,count,base,slen,*source; 884 PetscInt *lens,*lrows,*values,m; 885 MPI_Comm comm = ((PetscObject)A)->comm; 886 MPI_Request *send_waits,*recv_waits; 887 MPI_Status recv_status,*send_status; 888 const PetscScalar *xx; 889 PetscScalar *bb,*mask; 890 Vec xmask,lmask; 891 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 892 const PetscInt *aj, *ii,*ridx; 893 PetscScalar *aa; 894 #if defined(PETSC_DEBUG) 895 PetscBool found = PETSC_FALSE; 896 #endif 897 898 PetscFunctionBegin; 899 /* first count number of contributors to each processor */ 900 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 901 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 902 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 903 j = 0; 904 for (i=0; i<N; i++) { 905 if (lastidx > (idx = rows[i])) j = 0; 906 lastidx = idx; 907 for (; j<size; j++) { 908 if (idx >= owners[j] && idx < owners[j+1]) { 909 nprocs[2*j]++; 910 nprocs[2*j+1] = 1; 911 owner[i] = j; 912 #if defined(PETSC_DEBUG) 913 found = PETSC_TRUE; 914 #endif 915 break; 916 } 917 } 918 #if defined(PETSC_DEBUG) 919 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 920 found = PETSC_FALSE; 921 #endif 922 } 923 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 924 925 /* inform other processors of number of messages and max length*/ 926 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 927 928 /* post receives: */ 929 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 930 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 931 for (i=0; i<nrecvs; i++) { 932 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 933 } 934 935 /* do sends: 936 1) starts[i] gives the starting index in svalues for stuff going to 937 the ith processor 938 */ 939 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 940 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 941 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 942 starts[0] = 0; 943 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 944 for (i=0; i<N; i++) { 945 svalues[starts[owner[i]]++] = rows[i]; 946 } 947 948 starts[0] = 0; 949 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 950 count = 0; 951 for (i=0; i<size; i++) { 952 if (nprocs[2*i+1]) { 953 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 954 } 955 } 956 ierr = PetscFree(starts);CHKERRQ(ierr); 957 958 base = owners[rank]; 959 960 /* wait on receives */ 961 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 962 count = nrecvs; slen = 0; 963 while (count) { 964 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 965 /* unpack receives into our local space */ 966 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 967 source[imdex] = recv_status.MPI_SOURCE; 968 lens[imdex] = n; 969 slen += n; 970 count--; 971 } 972 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 973 974 /* move the data into the send scatter */ 975 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 976 count = 0; 977 for (i=0; i<nrecvs; i++) { 978 values = rvalues + i*nmax; 979 for (j=0; j<lens[i]; j++) { 980 lrows[count++] = values[j] - base; 981 } 982 } 983 ierr = PetscFree(rvalues);CHKERRQ(ierr); 984 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 985 ierr = PetscFree(owner);CHKERRQ(ierr); 986 ierr = PetscFree(nprocs);CHKERRQ(ierr); 987 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 988 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 991 992 /* handle off diagonal part of matrix */ 993 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<slen; i++) { 997 bb[lrows[i]] = 1; 998 } 999 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1003 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1006 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1008 1009 /* remove zeroed rows of off diagonal matrix */ 1010 ii = aij->i; 1011 for (i=0; i<slen; i++) { 1012 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1013 } 1014 1015 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1016 if (aij->compressedrow.use){ 1017 m = aij->compressedrow.nrows; 1018 ii = aij->compressedrow.i; 1019 ridx = aij->compressedrow.rindex; 1020 for (i=0; i<m; i++){ 1021 n = ii[i+1] - ii[i]; 1022 aj = aij->j + ii[i]; 1023 aa = aij->a + ii[i]; 1024 1025 for (j=0; j<n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 bb[*ridx] -= *aa*xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 ridx++; 1034 } 1035 } else { /* do not use compressed row format */ 1036 m = l->B->rmap->n; 1037 for (i=0; i<m; i++) { 1038 n = ii[i+1] - ii[i]; 1039 aj = aij->j + ii[i]; 1040 aa = aij->a + ii[i]; 1041 for (j=0; j<n; j++) { 1042 if (PetscAbsScalar(mask[*aj])) { 1043 bb[i] -= *aa*xx[*aj]; 1044 *aa = 0.0; 1045 } 1046 aa++; 1047 aj++; 1048 } 1049 } 1050 } 1051 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1052 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1053 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1054 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1055 ierr = PetscFree(lrows);CHKERRQ(ierr); 1056 1057 /* wait on sends */ 1058 if (nsends) { 1059 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1060 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1061 ierr = PetscFree(send_status);CHKERRQ(ierr); 1062 } 1063 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1064 ierr = PetscFree(svalues);CHKERRQ(ierr); 1065 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMult_MPIAIJ" 1071 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 PetscInt nt; 1076 1077 PetscFunctionBegin; 1078 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1079 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1080 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1082 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1083 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1089 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1101 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1108 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1109 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1116 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 PetscBool merged; 1121 1122 PetscFunctionBegin; 1123 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1124 /* do nondiagonal part */ 1125 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1126 if (!merged) { 1127 /* send it on its way */ 1128 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1129 /* do local part */ 1130 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1131 /* receive remote parts: note this assumes the values are not actually */ 1132 /* added in yy until the next line, */ 1133 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1134 } else { 1135 /* do local part */ 1136 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1137 /* send it on its way */ 1138 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1139 /* values actually were received in the Begin() but we need to call this nop */ 1140 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 } 1142 PetscFunctionReturn(0); 1143 } 1144 1145 EXTERN_C_BEGIN 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1148 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1149 { 1150 MPI_Comm comm; 1151 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1152 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1153 IS Me,Notme; 1154 PetscErrorCode ierr; 1155 PetscInt M,N,first,last,*notme,i; 1156 PetscMPIInt size; 1157 1158 PetscFunctionBegin; 1159 1160 /* Easy test: symmetric diagonal block */ 1161 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1162 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1163 if (!*f) PetscFunctionReturn(0); 1164 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1165 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1166 if (size == 1) PetscFunctionReturn(0); 1167 1168 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1169 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1170 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1171 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1172 for (i=0; i<first; i++) notme[i] = i; 1173 for (i=last; i<M; i++) notme[i-last+first] = i; 1174 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1175 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1176 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1177 Aoff = Aoffs[0]; 1178 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1179 Boff = Boffs[0]; 1180 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1181 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1182 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1183 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1184 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1185 ierr = PetscFree(notme);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 EXTERN_C_END 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1192 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1193 { 1194 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1195 PetscErrorCode ierr; 1196 1197 PetscFunctionBegin; 1198 /* do nondiagonal part */ 1199 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1200 /* send it on its way */ 1201 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1202 /* do local part */ 1203 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1204 /* receive remote parts */ 1205 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 /* 1210 This only works correctly for square matrices where the subblock A->A is the 1211 diagonal block 1212 */ 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1215 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1216 { 1217 PetscErrorCode ierr; 1218 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1219 1220 PetscFunctionBegin; 1221 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1222 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1223 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatScale_MPIAIJ" 1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1230 { 1231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1236 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatDestroy_MPIAIJ" 1242 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1243 { 1244 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 #if defined(PETSC_USE_LOG) 1249 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1250 #endif 1251 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1252 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1253 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1254 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1255 #if defined (PETSC_USE_CTABLE) 1256 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1257 #else 1258 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1259 #endif 1260 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1262 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1263 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1264 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1265 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1266 1267 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1281 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1282 { 1283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1284 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1285 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1286 PetscErrorCode ierr; 1287 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1288 int fd; 1289 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1290 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1291 PetscScalar *column_values; 1292 PetscInt message_count,flowcontrolcount; 1293 1294 PetscFunctionBegin; 1295 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1296 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1297 nz = A->nz + B->nz; 1298 if (!rank) { 1299 header[0] = MAT_FILE_CLASSID; 1300 header[1] = mat->rmap->N; 1301 header[2] = mat->cmap->N; 1302 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1303 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1304 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1305 /* get largest number of rows any processor has */ 1306 rlen = mat->rmap->n; 1307 range = mat->rmap->range; 1308 for (i=1; i<size; i++) { 1309 rlen = PetscMax(rlen,range[i+1] - range[i]); 1310 } 1311 } else { 1312 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1313 rlen = mat->rmap->n; 1314 } 1315 1316 /* load up the local row counts */ 1317 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1318 for (i=0; i<mat->rmap->n; i++) { 1319 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1320 } 1321 1322 /* store the row lengths to the file */ 1323 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1324 if (!rank) { 1325 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1326 for (i=1; i<size; i++) { 1327 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1328 rlen = range[i+1] - range[i]; 1329 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1330 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1331 } 1332 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1333 } else { 1334 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1335 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1336 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1337 } 1338 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1339 1340 /* load up the local column indices */ 1341 nzmax = nz; /* )th processor needs space a largest processor needs */ 1342 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1343 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1344 cnt = 0; 1345 for (i=0; i<mat->rmap->n; i++) { 1346 for (j=B->i[i]; j<B->i[i+1]; j++) { 1347 if ( (col = garray[B->j[j]]) > cstart) break; 1348 column_indices[cnt++] = col; 1349 } 1350 for (k=A->i[i]; k<A->i[i+1]; k++) { 1351 column_indices[cnt++] = A->j[k] + cstart; 1352 } 1353 for (; j<B->i[i+1]; j++) { 1354 column_indices[cnt++] = garray[B->j[j]]; 1355 } 1356 } 1357 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1358 1359 /* store the column indices to the file */ 1360 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1361 if (!rank) { 1362 MPI_Status status; 1363 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1364 for (i=1; i<size; i++) { 1365 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1366 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1367 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1368 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1369 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1370 } 1371 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1372 } else { 1373 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1374 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1375 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1376 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1377 } 1378 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1379 1380 /* load up the local column values */ 1381 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1382 cnt = 0; 1383 for (i=0; i<mat->rmap->n; i++) { 1384 for (j=B->i[i]; j<B->i[i+1]; j++) { 1385 if ( garray[B->j[j]] > cstart) break; 1386 column_values[cnt++] = B->a[j]; 1387 } 1388 for (k=A->i[i]; k<A->i[i+1]; k++) { 1389 column_values[cnt++] = A->a[k]; 1390 } 1391 for (; j<B->i[i+1]; j++) { 1392 column_values[cnt++] = B->a[j]; 1393 } 1394 } 1395 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1396 1397 /* store the column values to the file */ 1398 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1399 if (!rank) { 1400 MPI_Status status; 1401 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1402 for (i=1; i<size; i++) { 1403 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1404 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1405 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1406 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1407 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1408 } 1409 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1410 } else { 1411 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1412 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1413 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1414 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1415 } 1416 ierr = PetscFree(column_values);CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1422 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1423 { 1424 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1425 PetscErrorCode ierr; 1426 PetscMPIInt rank = aij->rank,size = aij->size; 1427 PetscBool isdraw,iascii,isbinary; 1428 PetscViewer sviewer; 1429 PetscViewerFormat format; 1430 1431 PetscFunctionBegin; 1432 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1433 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1434 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1435 if (iascii) { 1436 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1437 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1438 MatInfo info; 1439 PetscBool inodes; 1440 1441 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1442 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1443 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1444 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1445 if (!inodes) { 1446 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1447 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1448 } else { 1449 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1450 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1451 } 1452 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1453 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1454 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1455 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1456 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1457 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1458 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1459 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1460 PetscFunctionReturn(0); 1461 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1462 PetscInt inodecount,inodelimit,*inodes; 1463 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1464 if (inodes) { 1465 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1466 } else { 1467 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1468 } 1469 PetscFunctionReturn(0); 1470 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1471 PetscFunctionReturn(0); 1472 } 1473 } else if (isbinary) { 1474 if (size == 1) { 1475 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1476 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1477 } else { 1478 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1479 } 1480 PetscFunctionReturn(0); 1481 } else if (isdraw) { 1482 PetscDraw draw; 1483 PetscBool isnull; 1484 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1485 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1486 } 1487 1488 if (size == 1) { 1489 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1490 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1491 } else { 1492 /* assemble the entire matrix onto first processor. */ 1493 Mat A; 1494 Mat_SeqAIJ *Aloc; 1495 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1496 MatScalar *a; 1497 1498 if (mat->rmap->N > 1024) { 1499 PetscBool flg = PETSC_FALSE; 1500 1501 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1502 if (!flg) { 1503 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1504 } 1505 } 1506 1507 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1508 if (!rank) { 1509 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1510 } else { 1511 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1512 } 1513 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1514 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1515 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1516 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1517 1518 /* copy over the A part */ 1519 Aloc = (Mat_SeqAIJ*)aij->A->data; 1520 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1521 row = mat->rmap->rstart; 1522 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1523 for (i=0; i<m; i++) { 1524 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1525 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1526 } 1527 aj = Aloc->j; 1528 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1529 1530 /* copy over the B part */ 1531 Aloc = (Mat_SeqAIJ*)aij->B->data; 1532 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1533 row = mat->rmap->rstart; 1534 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1535 ct = cols; 1536 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1537 for (i=0; i<m; i++) { 1538 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1539 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1540 } 1541 ierr = PetscFree(ct);CHKERRQ(ierr); 1542 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1543 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1544 /* 1545 Everyone has to call to draw the matrix since the graphics waits are 1546 synchronized across all processors that share the PetscDraw object 1547 */ 1548 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1549 if (!rank) { 1550 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1551 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1552 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1553 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1554 } 1555 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1556 ierr = MatDestroy(&A);CHKERRQ(ierr); 1557 } 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatView_MPIAIJ" 1563 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1564 { 1565 PetscErrorCode ierr; 1566 PetscBool iascii,isdraw,issocket,isbinary; 1567 1568 PetscFunctionBegin; 1569 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1570 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1571 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1572 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1573 if (iascii || isdraw || isbinary || issocket) { 1574 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1575 } else { 1576 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1577 } 1578 PetscFunctionReturn(0); 1579 } 1580 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatSOR_MPIAIJ" 1583 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1584 { 1585 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1586 PetscErrorCode ierr; 1587 Vec bb1 = 0; 1588 PetscBool hasop; 1589 1590 PetscFunctionBegin; 1591 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1592 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1593 } 1594 1595 if (flag == SOR_APPLY_UPPER) { 1596 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1601 if (flag & SOR_ZERO_INITIAL_GUESS) { 1602 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1603 its--; 1604 } 1605 1606 while (its--) { 1607 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1608 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 1610 /* update rhs: bb1 = bb - B*x */ 1611 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1612 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1613 1614 /* local sweep */ 1615 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1616 } 1617 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1618 if (flag & SOR_ZERO_INITIAL_GUESS) { 1619 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1620 its--; 1621 } 1622 while (its--) { 1623 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 1626 /* update rhs: bb1 = bb - B*x */ 1627 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1628 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1629 1630 /* local sweep */ 1631 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1632 } 1633 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1634 if (flag & SOR_ZERO_INITIAL_GUESS) { 1635 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1636 its--; 1637 } 1638 while (its--) { 1639 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 1642 /* update rhs: bb1 = bb - B*x */ 1643 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1644 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1645 1646 /* local sweep */ 1647 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1648 } 1649 } else if (flag & SOR_EISENSTAT) { 1650 Vec xx1; 1651 1652 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1653 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1654 1655 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1656 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 if (!mat->diag) { 1658 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1659 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1660 } 1661 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1662 if (hasop) { 1663 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1664 } else { 1665 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1666 } 1667 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1668 1669 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1670 1671 /* local sweep */ 1672 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1673 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1674 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1675 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1676 1677 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatPermute_MPIAIJ" 1683 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1684 { 1685 MPI_Comm comm,pcomm; 1686 PetscInt first,local_size,nrows; 1687 const PetscInt *rows; 1688 PetscMPIInt size; 1689 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1690 PetscErrorCode ierr; 1691 1692 PetscFunctionBegin; 1693 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1694 /* make a collective version of 'rowp' */ 1695 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1696 if (pcomm==comm) { 1697 crowp = rowp; 1698 } else { 1699 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1700 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1701 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1702 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1703 } 1704 /* collect the global row permutation and invert it */ 1705 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1706 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1707 if (pcomm!=comm) { 1708 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1709 } 1710 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1711 /* get the local target indices */ 1712 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1713 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1714 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1715 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1716 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1717 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1718 /* the column permutation is so much easier; 1719 make a local version of 'colp' and invert it */ 1720 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1721 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1722 if (size==1) { 1723 lcolp = colp; 1724 } else { 1725 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1726 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1727 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1728 } 1729 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1730 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1731 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1732 if (size>1) { 1733 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1734 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1735 } 1736 /* now we just get the submatrix */ 1737 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1738 /* clean up */ 1739 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1740 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1746 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1747 { 1748 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1749 Mat A = mat->A,B = mat->B; 1750 PetscErrorCode ierr; 1751 PetscReal isend[5],irecv[5]; 1752 1753 PetscFunctionBegin; 1754 info->block_size = 1.0; 1755 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1759 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1760 isend[3] += info->memory; isend[4] += info->mallocs; 1761 if (flag == MAT_LOCAL) { 1762 info->nz_used = isend[0]; 1763 info->nz_allocated = isend[1]; 1764 info->nz_unneeded = isend[2]; 1765 info->memory = isend[3]; 1766 info->mallocs = isend[4]; 1767 } else if (flag == MAT_GLOBAL_MAX) { 1768 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1769 info->nz_used = irecv[0]; 1770 info->nz_allocated = irecv[1]; 1771 info->nz_unneeded = irecv[2]; 1772 info->memory = irecv[3]; 1773 info->mallocs = irecv[4]; 1774 } else if (flag == MAT_GLOBAL_SUM) { 1775 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1776 info->nz_used = irecv[0]; 1777 info->nz_allocated = irecv[1]; 1778 info->nz_unneeded = irecv[2]; 1779 info->memory = irecv[3]; 1780 info->mallocs = irecv[4]; 1781 } 1782 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1783 info->fill_ratio_needed = 0; 1784 info->factor_mallocs = 0; 1785 1786 PetscFunctionReturn(0); 1787 } 1788 1789 #undef __FUNCT__ 1790 #define __FUNCT__ "MatSetOption_MPIAIJ" 1791 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1792 { 1793 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1794 PetscErrorCode ierr; 1795 1796 PetscFunctionBegin; 1797 switch (op) { 1798 case MAT_NEW_NONZERO_LOCATIONS: 1799 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1800 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1801 case MAT_KEEP_NONZERO_PATTERN: 1802 case MAT_NEW_NONZERO_LOCATION_ERR: 1803 case MAT_USE_INODES: 1804 case MAT_IGNORE_ZERO_ENTRIES: 1805 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1806 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1807 break; 1808 case MAT_ROW_ORIENTED: 1809 a->roworiented = flg; 1810 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1811 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1812 break; 1813 case MAT_NEW_DIAGONALS: 1814 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1815 break; 1816 case MAT_IGNORE_OFF_PROC_ENTRIES: 1817 a->donotstash = flg; 1818 break; 1819 case MAT_SPD: 1820 A->spd_set = PETSC_TRUE; 1821 A->spd = flg; 1822 if (flg) { 1823 A->symmetric = PETSC_TRUE; 1824 A->structurally_symmetric = PETSC_TRUE; 1825 A->symmetric_set = PETSC_TRUE; 1826 A->structurally_symmetric_set = PETSC_TRUE; 1827 } 1828 break; 1829 case MAT_SYMMETRIC: 1830 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1831 break; 1832 case MAT_STRUCTURALLY_SYMMETRIC: 1833 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1834 break; 1835 case MAT_HERMITIAN: 1836 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1837 break; 1838 case MAT_SYMMETRY_ETERNAL: 1839 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1840 break; 1841 default: 1842 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1843 } 1844 PetscFunctionReturn(0); 1845 } 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatGetRow_MPIAIJ" 1849 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1850 { 1851 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1852 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1853 PetscErrorCode ierr; 1854 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1855 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1856 PetscInt *cmap,*idx_p; 1857 1858 PetscFunctionBegin; 1859 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1860 mat->getrowactive = PETSC_TRUE; 1861 1862 if (!mat->rowvalues && (idx || v)) { 1863 /* 1864 allocate enough space to hold information from the longest row. 1865 */ 1866 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1867 PetscInt max = 1,tmp; 1868 for (i=0; i<matin->rmap->n; i++) { 1869 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1870 if (max < tmp) { max = tmp; } 1871 } 1872 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1873 } 1874 1875 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1876 lrow = row - rstart; 1877 1878 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1879 if (!v) {pvA = 0; pvB = 0;} 1880 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1881 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1882 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1883 nztot = nzA + nzB; 1884 1885 cmap = mat->garray; 1886 if (v || idx) { 1887 if (nztot) { 1888 /* Sort by increasing column numbers, assuming A and B already sorted */ 1889 PetscInt imark = -1; 1890 if (v) { 1891 *v = v_p = mat->rowvalues; 1892 for (i=0; i<nzB; i++) { 1893 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1894 else break; 1895 } 1896 imark = i; 1897 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1898 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1899 } 1900 if (idx) { 1901 *idx = idx_p = mat->rowindices; 1902 if (imark > -1) { 1903 for (i=0; i<imark; i++) { 1904 idx_p[i] = cmap[cworkB[i]]; 1905 } 1906 } else { 1907 for (i=0; i<nzB; i++) { 1908 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1909 else break; 1910 } 1911 imark = i; 1912 } 1913 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1914 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1915 } 1916 } else { 1917 if (idx) *idx = 0; 1918 if (v) *v = 0; 1919 } 1920 } 1921 *nz = nztot; 1922 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1923 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1929 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1930 { 1931 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1932 1933 PetscFunctionBegin; 1934 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1935 aij->getrowactive = PETSC_FALSE; 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatNorm_MPIAIJ" 1941 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1942 { 1943 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1944 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1945 PetscErrorCode ierr; 1946 PetscInt i,j,cstart = mat->cmap->rstart; 1947 PetscReal sum = 0.0; 1948 MatScalar *v; 1949 1950 PetscFunctionBegin; 1951 if (aij->size == 1) { 1952 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1953 } else { 1954 if (type == NORM_FROBENIUS) { 1955 v = amat->a; 1956 for (i=0; i<amat->nz; i++) { 1957 #if defined(PETSC_USE_COMPLEX) 1958 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1959 #else 1960 sum += (*v)*(*v); v++; 1961 #endif 1962 } 1963 v = bmat->a; 1964 for (i=0; i<bmat->nz; i++) { 1965 #if defined(PETSC_USE_COMPLEX) 1966 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1967 #else 1968 sum += (*v)*(*v); v++; 1969 #endif 1970 } 1971 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1972 *norm = PetscSqrtReal(*norm); 1973 } else if (type == NORM_1) { /* max column norm */ 1974 PetscReal *tmp,*tmp2; 1975 PetscInt *jj,*garray = aij->garray; 1976 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1977 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1978 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1979 *norm = 0.0; 1980 v = amat->a; jj = amat->j; 1981 for (j=0; j<amat->nz; j++) { 1982 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1983 } 1984 v = bmat->a; jj = bmat->j; 1985 for (j=0; j<bmat->nz; j++) { 1986 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1987 } 1988 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1989 for (j=0; j<mat->cmap->N; j++) { 1990 if (tmp2[j] > *norm) *norm = tmp2[j]; 1991 } 1992 ierr = PetscFree(tmp);CHKERRQ(ierr); 1993 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1994 } else if (type == NORM_INFINITY) { /* max row norm */ 1995 PetscReal ntemp = 0.0; 1996 for (j=0; j<aij->A->rmap->n; j++) { 1997 v = amat->a + amat->i[j]; 1998 sum = 0.0; 1999 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2000 sum += PetscAbsScalar(*v); v++; 2001 } 2002 v = bmat->a + bmat->i[j]; 2003 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2004 sum += PetscAbsScalar(*v); v++; 2005 } 2006 if (sum > ntemp) ntemp = sum; 2007 } 2008 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2009 } else { 2010 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2011 } 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 #undef __FUNCT__ 2017 #define __FUNCT__ "MatTranspose_MPIAIJ" 2018 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2019 { 2020 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2021 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2022 PetscErrorCode ierr; 2023 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2024 PetscInt cstart=A->cmap->rstart,ncol; 2025 Mat B; 2026 MatScalar *array; 2027 2028 PetscFunctionBegin; 2029 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2030 2031 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2032 ai = Aloc->i; aj = Aloc->j; 2033 bi = Bloc->i; bj = Bloc->j; 2034 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2035 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2036 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2037 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2038 for (i=0; i<ai[ma]; i++){ 2039 d_nnz[aj[i]] ++; 2040 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2041 } 2042 2043 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2044 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2045 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2046 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2047 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2048 } else { 2049 B = *matout; 2050 } 2051 2052 /* copy over the A part */ 2053 array = Aloc->a; 2054 row = A->rmap->rstart; 2055 for (i=0; i<ma; i++) { 2056 ncol = ai[i+1]-ai[i]; 2057 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2058 row++; array += ncol; aj += ncol; 2059 } 2060 aj = Aloc->j; 2061 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2062 2063 /* copy over the B part */ 2064 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2065 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2066 array = Bloc->a; 2067 row = A->rmap->rstart; 2068 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2069 cols_tmp = cols; 2070 for (i=0; i<mb; i++) { 2071 ncol = bi[i+1]-bi[i]; 2072 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2073 row++; array += ncol; cols_tmp += ncol; 2074 } 2075 ierr = PetscFree(cols);CHKERRQ(ierr); 2076 2077 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2080 *matout = B; 2081 } else { 2082 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2089 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2090 { 2091 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2092 Mat a = aij->A,b = aij->B; 2093 PetscErrorCode ierr; 2094 PetscInt s1,s2,s3; 2095 2096 PetscFunctionBegin; 2097 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2098 if (rr) { 2099 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2100 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2101 /* Overlap communication with computation. */ 2102 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2103 } 2104 if (ll) { 2105 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2106 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2107 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2108 } 2109 /* scale the diagonal block */ 2110 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2111 2112 if (rr) { 2113 /* Do a scatter end and then right scale the off-diagonal block */ 2114 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2115 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2116 } 2117 2118 PetscFunctionReturn(0); 2119 } 2120 2121 #undef __FUNCT__ 2122 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2123 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2124 { 2125 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2126 PetscErrorCode ierr; 2127 2128 PetscFunctionBegin; 2129 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2130 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2131 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2132 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2133 PetscFunctionReturn(0); 2134 } 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2137 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2138 { 2139 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2140 PetscErrorCode ierr; 2141 2142 PetscFunctionBegin; 2143 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 #undef __FUNCT__ 2148 #define __FUNCT__ "MatEqual_MPIAIJ" 2149 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2150 { 2151 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2152 Mat a,b,c,d; 2153 PetscBool flg; 2154 PetscErrorCode ierr; 2155 2156 PetscFunctionBegin; 2157 a = matA->A; b = matA->B; 2158 c = matB->A; d = matB->B; 2159 2160 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2161 if (flg) { 2162 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2163 } 2164 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatCopy_MPIAIJ" 2170 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2171 { 2172 PetscErrorCode ierr; 2173 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2174 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2175 2176 PetscFunctionBegin; 2177 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2178 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2179 /* because of the column compression in the off-processor part of the matrix a->B, 2180 the number of columns in a->B and b->B may be different, hence we cannot call 2181 the MatCopy() directly on the two parts. If need be, we can provide a more 2182 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2183 then copying the submatrices */ 2184 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2185 } else { 2186 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2187 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2188 } 2189 PetscFunctionReturn(0); 2190 } 2191 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2194 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2195 { 2196 PetscErrorCode ierr; 2197 2198 PetscFunctionBegin; 2199 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2200 PetscFunctionReturn(0); 2201 } 2202 2203 #undef __FUNCT__ 2204 #define __FUNCT__ "MatAXPY_MPIAIJ" 2205 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2206 { 2207 PetscErrorCode ierr; 2208 PetscInt i; 2209 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2210 PetscBLASInt bnz,one=1; 2211 Mat_SeqAIJ *x,*y; 2212 2213 PetscFunctionBegin; 2214 if (str == SAME_NONZERO_PATTERN) { 2215 PetscScalar alpha = a; 2216 x = (Mat_SeqAIJ *)xx->A->data; 2217 y = (Mat_SeqAIJ *)yy->A->data; 2218 bnz = PetscBLASIntCast(x->nz); 2219 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2220 x = (Mat_SeqAIJ *)xx->B->data; 2221 y = (Mat_SeqAIJ *)yy->B->data; 2222 bnz = PetscBLASIntCast(x->nz); 2223 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2224 } else if (str == SUBSET_NONZERO_PATTERN) { 2225 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2226 2227 x = (Mat_SeqAIJ *)xx->B->data; 2228 y = (Mat_SeqAIJ *)yy->B->data; 2229 if (y->xtoy && y->XtoY != xx->B) { 2230 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2231 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2232 } 2233 if (!y->xtoy) { /* get xtoy */ 2234 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2235 y->XtoY = xx->B; 2236 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2237 } 2238 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2239 } else { 2240 Mat B; 2241 PetscInt *nnz_d,*nnz_o; 2242 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2243 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2244 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2245 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2246 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2247 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2248 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2249 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2250 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2251 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2252 ierr = MatHeaderReplace(Y,B); 2253 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2254 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2255 } 2256 PetscFunctionReturn(0); 2257 } 2258 2259 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2260 2261 #undef __FUNCT__ 2262 #define __FUNCT__ "MatConjugate_MPIAIJ" 2263 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2264 { 2265 #if defined(PETSC_USE_COMPLEX) 2266 PetscErrorCode ierr; 2267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2268 2269 PetscFunctionBegin; 2270 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2271 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2272 #else 2273 PetscFunctionBegin; 2274 #endif 2275 PetscFunctionReturn(0); 2276 } 2277 2278 #undef __FUNCT__ 2279 #define __FUNCT__ "MatRealPart_MPIAIJ" 2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2281 { 2282 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2283 PetscErrorCode ierr; 2284 2285 PetscFunctionBegin; 2286 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2287 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2300 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 #ifdef PETSC_HAVE_PBGL 2305 2306 #include <boost/parallel/mpi/bsp_process_group.hpp> 2307 #include <boost/graph/distributed/ilu_default_graph.hpp> 2308 #include <boost/graph/distributed/ilu_0_block.hpp> 2309 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2310 #include <boost/graph/distributed/petsc/interface.hpp> 2311 #include <boost/multi_array.hpp> 2312 #include <boost/parallel/distributed_property_map->hpp> 2313 2314 #undef __FUNCT__ 2315 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2316 /* 2317 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2318 */ 2319 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2320 { 2321 namespace petsc = boost::distributed::petsc; 2322 2323 namespace graph_dist = boost::graph::distributed; 2324 using boost::graph::distributed::ilu_default::process_group_type; 2325 using boost::graph::ilu_permuted; 2326 2327 PetscBool row_identity, col_identity; 2328 PetscContainer c; 2329 PetscInt m, n, M, N; 2330 PetscErrorCode ierr; 2331 2332 PetscFunctionBegin; 2333 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2334 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2335 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2336 if (!row_identity || !col_identity) { 2337 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2338 } 2339 2340 process_group_type pg; 2341 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2342 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2343 lgraph_type& level_graph = *lgraph_p; 2344 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2345 2346 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2347 ilu_permuted(level_graph); 2348 2349 /* put together the new matrix */ 2350 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2351 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2352 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2353 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2354 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2355 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2356 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2357 2358 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2359 ierr = PetscContainerSetPointer(c, lgraph_p); 2360 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2361 ierr = PetscContainerDestroy(&c); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2367 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2368 { 2369 PetscFunctionBegin; 2370 PetscFunctionReturn(0); 2371 } 2372 2373 #undef __FUNCT__ 2374 #define __FUNCT__ "MatSolve_MPIAIJ" 2375 /* 2376 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2377 */ 2378 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2379 { 2380 namespace graph_dist = boost::graph::distributed; 2381 2382 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2383 lgraph_type* lgraph_p; 2384 PetscContainer c; 2385 PetscErrorCode ierr; 2386 2387 PetscFunctionBegin; 2388 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2389 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2390 ierr = VecCopy(b, x);CHKERRQ(ierr); 2391 2392 PetscScalar* array_x; 2393 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2394 PetscInt sx; 2395 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2396 2397 PetscScalar* array_b; 2398 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2399 PetscInt sb; 2400 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2401 2402 lgraph_type& level_graph = *lgraph_p; 2403 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2404 2405 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2406 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2407 ref_x(array_x, boost::extents[num_vertices(graph)]); 2408 2409 typedef boost::iterator_property_map<array_ref_type::iterator, 2410 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2411 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2412 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2413 2414 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2415 2416 PetscFunctionReturn(0); 2417 } 2418 #endif 2419 2420 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2421 PetscInt nzlocal,nsends,nrecvs; 2422 PetscMPIInt *send_rank,*recv_rank; 2423 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2424 PetscScalar *sbuf_a,**rbuf_a; 2425 PetscErrorCode (*Destroy)(Mat); 2426 } Mat_Redundant; 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2430 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2431 { 2432 PetscErrorCode ierr; 2433 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2434 PetscInt i; 2435 2436 PetscFunctionBegin; 2437 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2438 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2439 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2440 for (i=0; i<redund->nrecvs; i++){ 2441 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2442 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2443 } 2444 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2445 ierr = PetscFree(redund);CHKERRQ(ierr); 2446 PetscFunctionReturn(0); 2447 } 2448 2449 #undef __FUNCT__ 2450 #define __FUNCT__ "MatDestroy_MatRedundant" 2451 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2452 { 2453 PetscErrorCode ierr; 2454 PetscContainer container; 2455 Mat_Redundant *redund=PETSC_NULL; 2456 2457 PetscFunctionBegin; 2458 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2459 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2460 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2461 A->ops->destroy = redund->Destroy; 2462 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2463 if (A->ops->destroy) { 2464 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2465 } 2466 PetscFunctionReturn(0); 2467 } 2468 2469 #undef __FUNCT__ 2470 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2471 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2472 { 2473 PetscMPIInt rank,size; 2474 MPI_Comm comm=((PetscObject)mat)->comm; 2475 PetscErrorCode ierr; 2476 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2477 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2478 PetscInt *rowrange=mat->rmap->range; 2479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2480 Mat A=aij->A,B=aij->B,C=*matredundant; 2481 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2482 PetscScalar *sbuf_a; 2483 PetscInt nzlocal=a->nz+b->nz; 2484 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2485 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2486 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2487 MatScalar *aworkA,*aworkB; 2488 PetscScalar *vals; 2489 PetscMPIInt tag1,tag2,tag3,imdex; 2490 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2491 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2492 MPI_Status recv_status,*send_status; 2493 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2494 PetscInt **rbuf_j=PETSC_NULL; 2495 PetscScalar **rbuf_a=PETSC_NULL; 2496 Mat_Redundant *redund=PETSC_NULL; 2497 PetscContainer container; 2498 2499 PetscFunctionBegin; 2500 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2501 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2502 2503 if (reuse == MAT_REUSE_MATRIX) { 2504 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2505 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2506 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2507 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2508 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2509 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2510 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2511 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2512 2513 nsends = redund->nsends; 2514 nrecvs = redund->nrecvs; 2515 send_rank = redund->send_rank; 2516 recv_rank = redund->recv_rank; 2517 sbuf_nz = redund->sbuf_nz; 2518 rbuf_nz = redund->rbuf_nz; 2519 sbuf_j = redund->sbuf_j; 2520 sbuf_a = redund->sbuf_a; 2521 rbuf_j = redund->rbuf_j; 2522 rbuf_a = redund->rbuf_a; 2523 } 2524 2525 if (reuse == MAT_INITIAL_MATRIX){ 2526 PetscMPIInt subrank,subsize; 2527 PetscInt nleftover,np_subcomm; 2528 /* get the destination processors' id send_rank, nsends and nrecvs */ 2529 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2530 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2531 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2532 np_subcomm = size/nsubcomm; 2533 nleftover = size - nsubcomm*np_subcomm; 2534 nsends = 0; nrecvs = 0; 2535 for (i=0; i<size; i++){ /* i=rank*/ 2536 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2537 send_rank[nsends] = i; nsends++; 2538 recv_rank[nrecvs++] = i; 2539 } 2540 } 2541 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2542 i = size-nleftover-1; 2543 j = 0; 2544 while (j < nsubcomm - nleftover){ 2545 send_rank[nsends++] = i; 2546 i--; j++; 2547 } 2548 } 2549 2550 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2551 for (i=0; i<nleftover; i++){ 2552 recv_rank[nrecvs++] = size-nleftover+i; 2553 } 2554 } 2555 2556 /* allocate sbuf_j, sbuf_a */ 2557 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2558 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2559 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2560 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2561 2562 /* copy mat's local entries into the buffers */ 2563 if (reuse == MAT_INITIAL_MATRIX){ 2564 rownz_max = 0; 2565 rptr = sbuf_j; 2566 cols = sbuf_j + rend-rstart + 1; 2567 vals = sbuf_a; 2568 rptr[0] = 0; 2569 for (i=0; i<rend-rstart; i++){ 2570 row = i + rstart; 2571 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2572 ncols = nzA + nzB; 2573 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2574 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2575 /* load the column indices for this row into cols */ 2576 lwrite = 0; 2577 for (l=0; l<nzB; l++) { 2578 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2579 vals[lwrite] = aworkB[l]; 2580 cols[lwrite++] = ctmp; 2581 } 2582 } 2583 for (l=0; l<nzA; l++){ 2584 vals[lwrite] = aworkA[l]; 2585 cols[lwrite++] = cstart + cworkA[l]; 2586 } 2587 for (l=0; l<nzB; l++) { 2588 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2589 vals[lwrite] = aworkB[l]; 2590 cols[lwrite++] = ctmp; 2591 } 2592 } 2593 vals += ncols; 2594 cols += ncols; 2595 rptr[i+1] = rptr[i] + ncols; 2596 if (rownz_max < ncols) rownz_max = ncols; 2597 } 2598 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2599 } else { /* only copy matrix values into sbuf_a */ 2600 rptr = sbuf_j; 2601 vals = sbuf_a; 2602 rptr[0] = 0; 2603 for (i=0; i<rend-rstart; i++){ 2604 row = i + rstart; 2605 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2606 ncols = nzA + nzB; 2607 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2608 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2609 lwrite = 0; 2610 for (l=0; l<nzB; l++) { 2611 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2612 } 2613 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2614 for (l=0; l<nzB; l++) { 2615 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2616 } 2617 vals += ncols; 2618 rptr[i+1] = rptr[i] + ncols; 2619 } 2620 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2621 2622 /* send nzlocal to others, and recv other's nzlocal */ 2623 /*--------------------------------------------------*/ 2624 if (reuse == MAT_INITIAL_MATRIX){ 2625 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2626 s_waits2 = s_waits3 + nsends; 2627 s_waits1 = s_waits2 + nsends; 2628 r_waits1 = s_waits1 + nsends; 2629 r_waits2 = r_waits1 + nrecvs; 2630 r_waits3 = r_waits2 + nrecvs; 2631 } else { 2632 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2633 r_waits3 = s_waits3 + nsends; 2634 } 2635 2636 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2637 if (reuse == MAT_INITIAL_MATRIX){ 2638 /* get new tags to keep the communication clean */ 2639 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2640 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2641 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2642 2643 /* post receives of other's nzlocal */ 2644 for (i=0; i<nrecvs; i++){ 2645 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2646 } 2647 /* send nzlocal to others */ 2648 for (i=0; i<nsends; i++){ 2649 sbuf_nz[i] = nzlocal; 2650 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2651 } 2652 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2653 count = nrecvs; 2654 while (count) { 2655 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2656 recv_rank[imdex] = recv_status.MPI_SOURCE; 2657 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2658 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2659 2660 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2661 rbuf_nz[imdex] += i + 2; 2662 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2663 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2664 count--; 2665 } 2666 /* wait on sends of nzlocal */ 2667 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2668 /* send mat->i,j to others, and recv from other's */ 2669 /*------------------------------------------------*/ 2670 for (i=0; i<nsends; i++){ 2671 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2672 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2673 } 2674 /* wait on receives of mat->i,j */ 2675 /*------------------------------*/ 2676 count = nrecvs; 2677 while (count) { 2678 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2679 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2680 count--; 2681 } 2682 /* wait on sends of mat->i,j */ 2683 /*---------------------------*/ 2684 if (nsends) { 2685 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2686 } 2687 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2688 2689 /* post receives, send and receive mat->a */ 2690 /*----------------------------------------*/ 2691 for (imdex=0; imdex<nrecvs; imdex++) { 2692 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2693 } 2694 for (i=0; i<nsends; i++){ 2695 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2696 } 2697 count = nrecvs; 2698 while (count) { 2699 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2700 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2701 count--; 2702 } 2703 if (nsends) { 2704 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2705 } 2706 2707 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2708 2709 /* create redundant matrix */ 2710 /*-------------------------*/ 2711 if (reuse == MAT_INITIAL_MATRIX){ 2712 /* compute rownz_max for preallocation */ 2713 for (imdex=0; imdex<nrecvs; imdex++){ 2714 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2715 rptr = rbuf_j[imdex]; 2716 for (i=0; i<j; i++){ 2717 ncols = rptr[i+1] - rptr[i]; 2718 if (rownz_max < ncols) rownz_max = ncols; 2719 } 2720 } 2721 2722 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2723 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2724 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2725 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2726 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2727 } else { 2728 C = *matredundant; 2729 } 2730 2731 /* insert local matrix entries */ 2732 rptr = sbuf_j; 2733 cols = sbuf_j + rend-rstart + 1; 2734 vals = sbuf_a; 2735 for (i=0; i<rend-rstart; i++){ 2736 row = i + rstart; 2737 ncols = rptr[i+1] - rptr[i]; 2738 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2739 vals += ncols; 2740 cols += ncols; 2741 } 2742 /* insert received matrix entries */ 2743 for (imdex=0; imdex<nrecvs; imdex++){ 2744 rstart = rowrange[recv_rank[imdex]]; 2745 rend = rowrange[recv_rank[imdex]+1]; 2746 rptr = rbuf_j[imdex]; 2747 cols = rbuf_j[imdex] + rend-rstart + 1; 2748 vals = rbuf_a[imdex]; 2749 for (i=0; i<rend-rstart; i++){ 2750 row = i + rstart; 2751 ncols = rptr[i+1] - rptr[i]; 2752 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2753 vals += ncols; 2754 cols += ncols; 2755 } 2756 } 2757 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2758 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2759 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2760 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2761 if (reuse == MAT_INITIAL_MATRIX) { 2762 PetscContainer container; 2763 *matredundant = C; 2764 /* create a supporting struct and attach it to C for reuse */ 2765 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2766 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2767 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2768 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2769 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2770 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2771 2772 redund->nzlocal = nzlocal; 2773 redund->nsends = nsends; 2774 redund->nrecvs = nrecvs; 2775 redund->send_rank = send_rank; 2776 redund->recv_rank = recv_rank; 2777 redund->sbuf_nz = sbuf_nz; 2778 redund->rbuf_nz = rbuf_nz; 2779 redund->sbuf_j = sbuf_j; 2780 redund->sbuf_a = sbuf_a; 2781 redund->rbuf_j = rbuf_j; 2782 redund->rbuf_a = rbuf_a; 2783 2784 redund->Destroy = C->ops->destroy; 2785 C->ops->destroy = MatDestroy_MatRedundant; 2786 } 2787 PetscFunctionReturn(0); 2788 } 2789 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2792 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2793 { 2794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2795 PetscErrorCode ierr; 2796 PetscInt i,*idxb = 0; 2797 PetscScalar *va,*vb; 2798 Vec vtmp; 2799 2800 PetscFunctionBegin; 2801 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2802 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2803 if (idx) { 2804 for (i=0; i<A->rmap->n; i++) { 2805 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2806 } 2807 } 2808 2809 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2810 if (idx) { 2811 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2812 } 2813 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2814 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2815 2816 for (i=0; i<A->rmap->n; i++){ 2817 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2818 va[i] = vb[i]; 2819 if (idx) idx[i] = a->garray[idxb[i]]; 2820 } 2821 } 2822 2823 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2824 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2825 ierr = PetscFree(idxb);CHKERRQ(ierr); 2826 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2827 PetscFunctionReturn(0); 2828 } 2829 2830 #undef __FUNCT__ 2831 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2832 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2833 { 2834 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2835 PetscErrorCode ierr; 2836 PetscInt i,*idxb = 0; 2837 PetscScalar *va,*vb; 2838 Vec vtmp; 2839 2840 PetscFunctionBegin; 2841 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2842 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2843 if (idx) { 2844 for (i=0; i<A->cmap->n; i++) { 2845 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2846 } 2847 } 2848 2849 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2850 if (idx) { 2851 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2852 } 2853 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2854 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2855 2856 for (i=0; i<A->rmap->n; i++){ 2857 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2858 va[i] = vb[i]; 2859 if (idx) idx[i] = a->garray[idxb[i]]; 2860 } 2861 } 2862 2863 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2864 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2865 ierr = PetscFree(idxb);CHKERRQ(ierr); 2866 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2867 PetscFunctionReturn(0); 2868 } 2869 2870 #undef __FUNCT__ 2871 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2872 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2873 { 2874 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2875 PetscInt n = A->rmap->n; 2876 PetscInt cstart = A->cmap->rstart; 2877 PetscInt *cmap = mat->garray; 2878 PetscInt *diagIdx, *offdiagIdx; 2879 Vec diagV, offdiagV; 2880 PetscScalar *a, *diagA, *offdiagA; 2881 PetscInt r; 2882 PetscErrorCode ierr; 2883 2884 PetscFunctionBegin; 2885 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2886 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2887 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2888 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2889 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2890 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2891 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2892 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2893 for(r = 0; r < n; ++r) { 2894 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2895 a[r] = diagA[r]; 2896 idx[r] = cstart + diagIdx[r]; 2897 } else { 2898 a[r] = offdiagA[r]; 2899 idx[r] = cmap[offdiagIdx[r]]; 2900 } 2901 } 2902 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2903 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2904 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2905 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2906 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2907 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2908 PetscFunctionReturn(0); 2909 } 2910 2911 #undef __FUNCT__ 2912 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2913 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2914 { 2915 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2916 PetscInt n = A->rmap->n; 2917 PetscInt cstart = A->cmap->rstart; 2918 PetscInt *cmap = mat->garray; 2919 PetscInt *diagIdx, *offdiagIdx; 2920 Vec diagV, offdiagV; 2921 PetscScalar *a, *diagA, *offdiagA; 2922 PetscInt r; 2923 PetscErrorCode ierr; 2924 2925 PetscFunctionBegin; 2926 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2927 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2928 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2929 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2930 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2931 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2932 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2933 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2934 for(r = 0; r < n; ++r) { 2935 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2936 a[r] = diagA[r]; 2937 idx[r] = cstart + diagIdx[r]; 2938 } else { 2939 a[r] = offdiagA[r]; 2940 idx[r] = cmap[offdiagIdx[r]]; 2941 } 2942 } 2943 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2944 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2945 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2946 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2947 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2948 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2949 PetscFunctionReturn(0); 2950 } 2951 2952 #undef __FUNCT__ 2953 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2954 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2955 { 2956 PetscErrorCode ierr; 2957 Mat *dummy; 2958 2959 PetscFunctionBegin; 2960 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2961 *newmat = *dummy; 2962 ierr = PetscFree(dummy);CHKERRQ(ierr); 2963 PetscFunctionReturn(0); 2964 } 2965 2966 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2967 2968 #undef __FUNCT__ 2969 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2970 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2971 { 2972 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2973 PetscErrorCode ierr; 2974 2975 PetscFunctionBegin; 2976 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2977 PetscFunctionReturn(0); 2978 } 2979 2980 2981 /* -------------------------------------------------------------------*/ 2982 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2983 MatGetRow_MPIAIJ, 2984 MatRestoreRow_MPIAIJ, 2985 MatMult_MPIAIJ, 2986 /* 4*/ MatMultAdd_MPIAIJ, 2987 MatMultTranspose_MPIAIJ, 2988 MatMultTransposeAdd_MPIAIJ, 2989 #ifdef PETSC_HAVE_PBGL 2990 MatSolve_MPIAIJ, 2991 #else 2992 0, 2993 #endif 2994 0, 2995 0, 2996 /*10*/ 0, 2997 0, 2998 0, 2999 MatSOR_MPIAIJ, 3000 MatTranspose_MPIAIJ, 3001 /*15*/ MatGetInfo_MPIAIJ, 3002 MatEqual_MPIAIJ, 3003 MatGetDiagonal_MPIAIJ, 3004 MatDiagonalScale_MPIAIJ, 3005 MatNorm_MPIAIJ, 3006 /*20*/ MatAssemblyBegin_MPIAIJ, 3007 MatAssemblyEnd_MPIAIJ, 3008 MatSetOption_MPIAIJ, 3009 MatZeroEntries_MPIAIJ, 3010 /*24*/ MatZeroRows_MPIAIJ, 3011 0, 3012 #ifdef PETSC_HAVE_PBGL 3013 0, 3014 #else 3015 0, 3016 #endif 3017 0, 3018 0, 3019 /*29*/ MatSetUpPreallocation_MPIAIJ, 3020 #ifdef PETSC_HAVE_PBGL 3021 0, 3022 #else 3023 0, 3024 #endif 3025 0, 3026 0, 3027 0, 3028 /*34*/ MatDuplicate_MPIAIJ, 3029 0, 3030 0, 3031 0, 3032 0, 3033 /*39*/ MatAXPY_MPIAIJ, 3034 MatGetSubMatrices_MPIAIJ, 3035 MatIncreaseOverlap_MPIAIJ, 3036 MatGetValues_MPIAIJ, 3037 MatCopy_MPIAIJ, 3038 /*44*/ MatGetRowMax_MPIAIJ, 3039 MatScale_MPIAIJ, 3040 0, 3041 0, 3042 MatZeroRowsColumns_MPIAIJ, 3043 /*49*/ MatSetBlockSize_MPIAIJ, 3044 0, 3045 0, 3046 0, 3047 0, 3048 /*54*/ MatFDColoringCreate_MPIAIJ, 3049 0, 3050 MatSetUnfactored_MPIAIJ, 3051 MatPermute_MPIAIJ, 3052 0, 3053 /*59*/ MatGetSubMatrix_MPIAIJ, 3054 MatDestroy_MPIAIJ, 3055 MatView_MPIAIJ, 3056 0, 3057 0, 3058 /*64*/ 0, 3059 0, 3060 0, 3061 0, 3062 0, 3063 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3064 MatGetRowMinAbs_MPIAIJ, 3065 0, 3066 MatSetColoring_MPIAIJ, 3067 #if defined(PETSC_HAVE_ADIC) 3068 MatSetValuesAdic_MPIAIJ, 3069 #else 3070 0, 3071 #endif 3072 MatSetValuesAdifor_MPIAIJ, 3073 /*75*/ MatFDColoringApply_AIJ, 3074 0, 3075 0, 3076 0, 3077 0, 3078 /*80*/ 0, 3079 0, 3080 0, 3081 /*83*/ MatLoad_MPIAIJ, 3082 0, 3083 0, 3084 0, 3085 0, 3086 0, 3087 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3088 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3089 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3090 MatPtAP_Basic, 3091 MatPtAPSymbolic_MPIAIJ, 3092 /*94*/ MatPtAPNumeric_MPIAIJ, 3093 0, 3094 0, 3095 0, 3096 0, 3097 /*99*/ 0, 3098 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3099 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3100 MatConjugate_MPIAIJ, 3101 0, 3102 /*104*/MatSetValuesRow_MPIAIJ, 3103 MatRealPart_MPIAIJ, 3104 MatImaginaryPart_MPIAIJ, 3105 0, 3106 0, 3107 /*109*/0, 3108 MatGetRedundantMatrix_MPIAIJ, 3109 MatGetRowMin_MPIAIJ, 3110 0, 3111 0, 3112 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3113 0, 3114 0, 3115 0, 3116 0, 3117 /*119*/0, 3118 0, 3119 0, 3120 0, 3121 MatGetMultiProcBlock_MPIAIJ, 3122 /*124*/MatFindNonZeroRows_MPIAIJ, 3123 MatGetColumnNorms_MPIAIJ, 3124 MatInvertBlockDiagonal_MPIAIJ, 3125 0, 3126 MatGetSubMatricesParallel_MPIAIJ, 3127 /*129*/0 3128 }; 3129 3130 /* ----------------------------------------------------------------------------------------*/ 3131 3132 EXTERN_C_BEGIN 3133 #undef __FUNCT__ 3134 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3135 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3136 { 3137 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3138 PetscErrorCode ierr; 3139 3140 PetscFunctionBegin; 3141 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3142 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3143 PetscFunctionReturn(0); 3144 } 3145 EXTERN_C_END 3146 3147 EXTERN_C_BEGIN 3148 #undef __FUNCT__ 3149 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3150 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3151 { 3152 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3153 PetscErrorCode ierr; 3154 3155 PetscFunctionBegin; 3156 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3157 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3158 PetscFunctionReturn(0); 3159 } 3160 EXTERN_C_END 3161 3162 EXTERN_C_BEGIN 3163 #undef __FUNCT__ 3164 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3165 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3166 { 3167 Mat_MPIAIJ *b; 3168 PetscErrorCode ierr; 3169 PetscInt i; 3170 3171 PetscFunctionBegin; 3172 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3173 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3174 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3175 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3176 3177 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3178 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3179 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3180 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3181 if (d_nnz) { 3182 for (i=0; i<B->rmap->n; i++) { 3183 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3184 } 3185 } 3186 if (o_nnz) { 3187 for (i=0; i<B->rmap->n; i++) { 3188 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3189 } 3190 } 3191 b = (Mat_MPIAIJ*)B->data; 3192 3193 if (!B->preallocated) { 3194 /* Explicitly create 2 MATSEQAIJ matrices. */ 3195 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3196 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3197 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3198 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3199 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3200 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3201 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3202 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3203 } 3204 3205 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3206 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3207 B->preallocated = PETSC_TRUE; 3208 PetscFunctionReturn(0); 3209 } 3210 EXTERN_C_END 3211 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3214 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3215 { 3216 Mat mat; 3217 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3218 PetscErrorCode ierr; 3219 3220 PetscFunctionBegin; 3221 *newmat = 0; 3222 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3223 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3224 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3225 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3226 a = (Mat_MPIAIJ*)mat->data; 3227 3228 mat->factortype = matin->factortype; 3229 mat->rmap->bs = matin->rmap->bs; 3230 mat->assembled = PETSC_TRUE; 3231 mat->insertmode = NOT_SET_VALUES; 3232 mat->preallocated = PETSC_TRUE; 3233 3234 a->size = oldmat->size; 3235 a->rank = oldmat->rank; 3236 a->donotstash = oldmat->donotstash; 3237 a->roworiented = oldmat->roworiented; 3238 a->rowindices = 0; 3239 a->rowvalues = 0; 3240 a->getrowactive = PETSC_FALSE; 3241 3242 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3243 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3244 3245 if (oldmat->colmap) { 3246 #if defined (PETSC_USE_CTABLE) 3247 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3248 #else 3249 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3250 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3251 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3252 #endif 3253 } else a->colmap = 0; 3254 if (oldmat->garray) { 3255 PetscInt len; 3256 len = oldmat->B->cmap->n; 3257 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3258 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3259 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3260 } else a->garray = 0; 3261 3262 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3263 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3264 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3265 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3266 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3267 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3268 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3269 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3270 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3271 *newmat = mat; 3272 PetscFunctionReturn(0); 3273 } 3274 3275 3276 3277 #undef __FUNCT__ 3278 #define __FUNCT__ "MatLoad_MPIAIJ" 3279 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3280 { 3281 PetscScalar *vals,*svals; 3282 MPI_Comm comm = ((PetscObject)viewer)->comm; 3283 PetscErrorCode ierr; 3284 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3285 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3286 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3287 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3288 PetscInt cend,cstart,n,*rowners,sizesset=1; 3289 int fd; 3290 3291 PetscFunctionBegin; 3292 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3293 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3294 if (!rank) { 3295 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3296 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3297 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3298 } 3299 3300 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3301 3302 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3303 M = header[1]; N = header[2]; 3304 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3305 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3306 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3307 3308 /* If global sizes are set, check if they are consistent with that given in the file */ 3309 if (sizesset) { 3310 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3311 } 3312 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3313 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3314 3315 /* determine ownership of all rows */ 3316 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3317 else m = newMat->rmap->n; /* Set by user */ 3318 3319 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3320 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3321 3322 /* First process needs enough room for process with most rows */ 3323 if (!rank) { 3324 mmax = rowners[1]; 3325 for (i=2; i<size; i++) { 3326 mmax = PetscMax(mmax,rowners[i]); 3327 } 3328 } else mmax = m; 3329 3330 rowners[0] = 0; 3331 for (i=2; i<=size; i++) { 3332 rowners[i] += rowners[i-1]; 3333 } 3334 rstart = rowners[rank]; 3335 rend = rowners[rank+1]; 3336 3337 /* distribute row lengths to all processors */ 3338 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3339 if (!rank) { 3340 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3341 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3342 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3343 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3344 for (j=0; j<m; j++) { 3345 procsnz[0] += ourlens[j]; 3346 } 3347 for (i=1; i<size; i++) { 3348 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3349 /* calculate the number of nonzeros on each processor */ 3350 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3351 procsnz[i] += rowlengths[j]; 3352 } 3353 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3354 } 3355 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3356 } else { 3357 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3358 } 3359 3360 if (!rank) { 3361 /* determine max buffer needed and allocate it */ 3362 maxnz = 0; 3363 for (i=0; i<size; i++) { 3364 maxnz = PetscMax(maxnz,procsnz[i]); 3365 } 3366 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3367 3368 /* read in my part of the matrix column indices */ 3369 nz = procsnz[0]; 3370 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3371 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3372 3373 /* read in every one elses and ship off */ 3374 for (i=1; i<size; i++) { 3375 nz = procsnz[i]; 3376 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3377 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3378 } 3379 ierr = PetscFree(cols);CHKERRQ(ierr); 3380 } else { 3381 /* determine buffer space needed for message */ 3382 nz = 0; 3383 for (i=0; i<m; i++) { 3384 nz += ourlens[i]; 3385 } 3386 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3387 3388 /* receive message of column indices*/ 3389 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3390 } 3391 3392 /* determine column ownership if matrix is not square */ 3393 if (N != M) { 3394 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3395 else n = newMat->cmap->n; 3396 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3397 cstart = cend - n; 3398 } else { 3399 cstart = rstart; 3400 cend = rend; 3401 n = cend - cstart; 3402 } 3403 3404 /* loop over local rows, determining number of off diagonal entries */ 3405 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3406 jj = 0; 3407 for (i=0; i<m; i++) { 3408 for (j=0; j<ourlens[i]; j++) { 3409 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3410 jj++; 3411 } 3412 } 3413 3414 for (i=0; i<m; i++) { 3415 ourlens[i] -= offlens[i]; 3416 } 3417 if (!sizesset) { 3418 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3419 } 3420 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3421 3422 for (i=0; i<m; i++) { 3423 ourlens[i] += offlens[i]; 3424 } 3425 3426 if (!rank) { 3427 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3428 3429 /* read in my part of the matrix numerical values */ 3430 nz = procsnz[0]; 3431 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3432 3433 /* insert into matrix */ 3434 jj = rstart; 3435 smycols = mycols; 3436 svals = vals; 3437 for (i=0; i<m; i++) { 3438 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3439 smycols += ourlens[i]; 3440 svals += ourlens[i]; 3441 jj++; 3442 } 3443 3444 /* read in other processors and ship out */ 3445 for (i=1; i<size; i++) { 3446 nz = procsnz[i]; 3447 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3448 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3449 } 3450 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3451 } else { 3452 /* receive numeric values */ 3453 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3454 3455 /* receive message of values*/ 3456 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3457 3458 /* insert into matrix */ 3459 jj = rstart; 3460 smycols = mycols; 3461 svals = vals; 3462 for (i=0; i<m; i++) { 3463 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3464 smycols += ourlens[i]; 3465 svals += ourlens[i]; 3466 jj++; 3467 } 3468 } 3469 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3470 ierr = PetscFree(vals);CHKERRQ(ierr); 3471 ierr = PetscFree(mycols);CHKERRQ(ierr); 3472 ierr = PetscFree(rowners);CHKERRQ(ierr); 3473 3474 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3475 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3476 PetscFunctionReturn(0); 3477 } 3478 3479 #undef __FUNCT__ 3480 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3481 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3482 { 3483 PetscErrorCode ierr; 3484 IS iscol_local; 3485 PetscInt csize; 3486 3487 PetscFunctionBegin; 3488 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3489 if (call == MAT_REUSE_MATRIX) { 3490 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3491 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3492 } else { 3493 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3494 } 3495 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3496 if (call == MAT_INITIAL_MATRIX) { 3497 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3498 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3499 } 3500 PetscFunctionReturn(0); 3501 } 3502 3503 #undef __FUNCT__ 3504 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3505 /* 3506 Not great since it makes two copies of the submatrix, first an SeqAIJ 3507 in local and then by concatenating the local matrices the end result. 3508 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3509 3510 Note: This requires a sequential iscol with all indices. 3511 */ 3512 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3513 { 3514 PetscErrorCode ierr; 3515 PetscMPIInt rank,size; 3516 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3517 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3518 Mat *local,M,Mreuse; 3519 MatScalar *vwork,*aa; 3520 MPI_Comm comm = ((PetscObject)mat)->comm; 3521 Mat_SeqAIJ *aij; 3522 3523 3524 PetscFunctionBegin; 3525 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3527 3528 if (call == MAT_REUSE_MATRIX) { 3529 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3530 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3531 local = &Mreuse; 3532 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3533 } else { 3534 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3535 Mreuse = *local; 3536 ierr = PetscFree(local);CHKERRQ(ierr); 3537 } 3538 3539 /* 3540 m - number of local rows 3541 n - number of columns (same on all processors) 3542 rstart - first row in new global matrix generated 3543 */ 3544 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3545 if (call == MAT_INITIAL_MATRIX) { 3546 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3547 ii = aij->i; 3548 jj = aij->j; 3549 3550 /* 3551 Determine the number of non-zeros in the diagonal and off-diagonal 3552 portions of the matrix in order to do correct preallocation 3553 */ 3554 3555 /* first get start and end of "diagonal" columns */ 3556 if (csize == PETSC_DECIDE) { 3557 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3558 if (mglobal == n) { /* square matrix */ 3559 nlocal = m; 3560 } else { 3561 nlocal = n/size + ((n % size) > rank); 3562 } 3563 } else { 3564 nlocal = csize; 3565 } 3566 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3567 rstart = rend - nlocal; 3568 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3569 3570 /* next, compute all the lengths */ 3571 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3572 olens = dlens + m; 3573 for (i=0; i<m; i++) { 3574 jend = ii[i+1] - ii[i]; 3575 olen = 0; 3576 dlen = 0; 3577 for (j=0; j<jend; j++) { 3578 if (*jj < rstart || *jj >= rend) olen++; 3579 else dlen++; 3580 jj++; 3581 } 3582 olens[i] = olen; 3583 dlens[i] = dlen; 3584 } 3585 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3586 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3587 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3588 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3589 ierr = PetscFree(dlens);CHKERRQ(ierr); 3590 } else { 3591 PetscInt ml,nl; 3592 3593 M = *newmat; 3594 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3595 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3596 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3597 /* 3598 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3599 rather than the slower MatSetValues(). 3600 */ 3601 M->was_assembled = PETSC_TRUE; 3602 M->assembled = PETSC_FALSE; 3603 } 3604 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3605 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3606 ii = aij->i; 3607 jj = aij->j; 3608 aa = aij->a; 3609 for (i=0; i<m; i++) { 3610 row = rstart + i; 3611 nz = ii[i+1] - ii[i]; 3612 cwork = jj; jj += nz; 3613 vwork = aa; aa += nz; 3614 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3615 } 3616 3617 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3618 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3619 *newmat = M; 3620 3621 /* save submatrix used in processor for next request */ 3622 if (call == MAT_INITIAL_MATRIX) { 3623 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3624 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3625 } 3626 3627 PetscFunctionReturn(0); 3628 } 3629 3630 EXTERN_C_BEGIN 3631 #undef __FUNCT__ 3632 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3633 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3634 { 3635 PetscInt m,cstart, cend,j,nnz,i,d; 3636 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3637 const PetscInt *JJ; 3638 PetscScalar *values; 3639 PetscErrorCode ierr; 3640 3641 PetscFunctionBegin; 3642 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3643 3644 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3645 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3646 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3647 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3648 m = B->rmap->n; 3649 cstart = B->cmap->rstart; 3650 cend = B->cmap->rend; 3651 rstart = B->rmap->rstart; 3652 3653 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3654 3655 #if defined(PETSC_USE_DEBUGGING) 3656 for (i=0; i<m; i++) { 3657 nnz = Ii[i+1]- Ii[i]; 3658 JJ = J + Ii[i]; 3659 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3660 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3661 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3662 } 3663 #endif 3664 3665 for (i=0; i<m; i++) { 3666 nnz = Ii[i+1]- Ii[i]; 3667 JJ = J + Ii[i]; 3668 nnz_max = PetscMax(nnz_max,nnz); 3669 d = 0; 3670 for (j=0; j<nnz; j++) { 3671 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3672 } 3673 d_nnz[i] = d; 3674 o_nnz[i] = nnz - d; 3675 } 3676 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3677 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3678 3679 if (v) values = (PetscScalar*)v; 3680 else { 3681 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3682 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3683 } 3684 3685 for (i=0; i<m; i++) { 3686 ii = i + rstart; 3687 nnz = Ii[i+1]- Ii[i]; 3688 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3689 } 3690 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3691 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3692 3693 if (!v) { 3694 ierr = PetscFree(values);CHKERRQ(ierr); 3695 } 3696 PetscFunctionReturn(0); 3697 } 3698 EXTERN_C_END 3699 3700 #undef __FUNCT__ 3701 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3702 /*@ 3703 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3704 (the default parallel PETSc format). 3705 3706 Collective on MPI_Comm 3707 3708 Input Parameters: 3709 + B - the matrix 3710 . i - the indices into j for the start of each local row (starts with zero) 3711 . j - the column indices for each local row (starts with zero) 3712 - v - optional values in the matrix 3713 3714 Level: developer 3715 3716 Notes: 3717 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3718 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3719 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3720 3721 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3722 3723 The format which is used for the sparse matrix input, is equivalent to a 3724 row-major ordering.. i.e for the following matrix, the input data expected is 3725 as shown: 3726 3727 1 0 0 3728 2 0 3 P0 3729 ------- 3730 4 5 6 P1 3731 3732 Process0 [P0]: rows_owned=[0,1] 3733 i = {0,1,3} [size = nrow+1 = 2+1] 3734 j = {0,0,2} [size = nz = 6] 3735 v = {1,2,3} [size = nz = 6] 3736 3737 Process1 [P1]: rows_owned=[2] 3738 i = {0,3} [size = nrow+1 = 1+1] 3739 j = {0,1,2} [size = nz = 6] 3740 v = {4,5,6} [size = nz = 6] 3741 3742 .keywords: matrix, aij, compressed row, sparse, parallel 3743 3744 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3745 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3746 @*/ 3747 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3748 { 3749 PetscErrorCode ierr; 3750 3751 PetscFunctionBegin; 3752 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3753 PetscFunctionReturn(0); 3754 } 3755 3756 #undef __FUNCT__ 3757 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3758 /*@C 3759 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3760 (the default parallel PETSc format). For good matrix assembly performance 3761 the user should preallocate the matrix storage by setting the parameters 3762 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3763 performance can be increased by more than a factor of 50. 3764 3765 Collective on MPI_Comm 3766 3767 Input Parameters: 3768 + A - the matrix 3769 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3770 (same value is used for all local rows) 3771 . d_nnz - array containing the number of nonzeros in the various rows of the 3772 DIAGONAL portion of the local submatrix (possibly different for each row) 3773 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3774 The size of this array is equal to the number of local rows, i.e 'm'. 3775 You must leave room for the diagonal entry even if it is zero. 3776 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3777 submatrix (same value is used for all local rows). 3778 - o_nnz - array containing the number of nonzeros in the various rows of the 3779 OFF-DIAGONAL portion of the local submatrix (possibly different for 3780 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3781 structure. The size of this array is equal to the number 3782 of local rows, i.e 'm'. 3783 3784 If the *_nnz parameter is given then the *_nz parameter is ignored 3785 3786 The AIJ format (also called the Yale sparse matrix format or 3787 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3788 storage. The stored row and column indices begin with zero. 3789 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3790 3791 The parallel matrix is partitioned such that the first m0 rows belong to 3792 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3793 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3794 3795 The DIAGONAL portion of the local submatrix of a processor can be defined 3796 as the submatrix which is obtained by extraction the part corresponding to 3797 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3798 first row that belongs to the processor, r2 is the last row belonging to 3799 the this processor, and c1-c2 is range of indices of the local part of a 3800 vector suitable for applying the matrix to. This is an mxn matrix. In the 3801 common case of a square matrix, the row and column ranges are the same and 3802 the DIAGONAL part is also square. The remaining portion of the local 3803 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3804 3805 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3806 3807 You can call MatGetInfo() to get information on how effective the preallocation was; 3808 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3809 You can also run with the option -info and look for messages with the string 3810 malloc in them to see if additional memory allocation was needed. 3811 3812 Example usage: 3813 3814 Consider the following 8x8 matrix with 34 non-zero values, that is 3815 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3816 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3817 as follows: 3818 3819 .vb 3820 1 2 0 | 0 3 0 | 0 4 3821 Proc0 0 5 6 | 7 0 0 | 8 0 3822 9 0 10 | 11 0 0 | 12 0 3823 ------------------------------------- 3824 13 0 14 | 15 16 17 | 0 0 3825 Proc1 0 18 0 | 19 20 21 | 0 0 3826 0 0 0 | 22 23 0 | 24 0 3827 ------------------------------------- 3828 Proc2 25 26 27 | 0 0 28 | 29 0 3829 30 0 0 | 31 32 33 | 0 34 3830 .ve 3831 3832 This can be represented as a collection of submatrices as: 3833 3834 .vb 3835 A B C 3836 D E F 3837 G H I 3838 .ve 3839 3840 Where the submatrices A,B,C are owned by proc0, D,E,F are 3841 owned by proc1, G,H,I are owned by proc2. 3842 3843 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3844 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3845 The 'M','N' parameters are 8,8, and have the same values on all procs. 3846 3847 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3848 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3849 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3850 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3851 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3852 matrix, ans [DF] as another SeqAIJ matrix. 3853 3854 When d_nz, o_nz parameters are specified, d_nz storage elements are 3855 allocated for every row of the local diagonal submatrix, and o_nz 3856 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3857 One way to choose d_nz and o_nz is to use the max nonzerors per local 3858 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3859 In this case, the values of d_nz,o_nz are: 3860 .vb 3861 proc0 : dnz = 2, o_nz = 2 3862 proc1 : dnz = 3, o_nz = 2 3863 proc2 : dnz = 1, o_nz = 4 3864 .ve 3865 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3866 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3867 for proc3. i.e we are using 12+15+10=37 storage locations to store 3868 34 values. 3869 3870 When d_nnz, o_nnz parameters are specified, the storage is specified 3871 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3872 In the above case the values for d_nnz,o_nnz are: 3873 .vb 3874 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3875 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3876 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3877 .ve 3878 Here the space allocated is sum of all the above values i.e 34, and 3879 hence pre-allocation is perfect. 3880 3881 Level: intermediate 3882 3883 .keywords: matrix, aij, compressed row, sparse, parallel 3884 3885 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3886 MPIAIJ, MatGetInfo() 3887 @*/ 3888 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3889 { 3890 PetscErrorCode ierr; 3891 3892 PetscFunctionBegin; 3893 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3894 PetscFunctionReturn(0); 3895 } 3896 3897 #undef __FUNCT__ 3898 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3899 /*@ 3900 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3901 CSR format the local rows. 3902 3903 Collective on MPI_Comm 3904 3905 Input Parameters: 3906 + comm - MPI communicator 3907 . m - number of local rows (Cannot be PETSC_DECIDE) 3908 . n - This value should be the same as the local size used in creating the 3909 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3910 calculated if N is given) For square matrices n is almost always m. 3911 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3912 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3913 . i - row indices 3914 . j - column indices 3915 - a - matrix values 3916 3917 Output Parameter: 3918 . mat - the matrix 3919 3920 Level: intermediate 3921 3922 Notes: 3923 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3924 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3925 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3926 3927 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3928 3929 The format which is used for the sparse matrix input, is equivalent to a 3930 row-major ordering.. i.e for the following matrix, the input data expected is 3931 as shown: 3932 3933 1 0 0 3934 2 0 3 P0 3935 ------- 3936 4 5 6 P1 3937 3938 Process0 [P0]: rows_owned=[0,1] 3939 i = {0,1,3} [size = nrow+1 = 2+1] 3940 j = {0,0,2} [size = nz = 6] 3941 v = {1,2,3} [size = nz = 6] 3942 3943 Process1 [P1]: rows_owned=[2] 3944 i = {0,3} [size = nrow+1 = 1+1] 3945 j = {0,1,2} [size = nz = 6] 3946 v = {4,5,6} [size = nz = 6] 3947 3948 .keywords: matrix, aij, compressed row, sparse, parallel 3949 3950 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3951 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3952 @*/ 3953 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3954 { 3955 PetscErrorCode ierr; 3956 3957 PetscFunctionBegin; 3958 if (i[0]) { 3959 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3960 } 3961 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3962 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3963 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3964 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3965 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3966 PetscFunctionReturn(0); 3967 } 3968 3969 #undef __FUNCT__ 3970 #define __FUNCT__ "MatCreateMPIAIJ" 3971 /*@C 3972 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3973 (the default parallel PETSc format). For good matrix assembly performance 3974 the user should preallocate the matrix storage by setting the parameters 3975 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3976 performance can be increased by more than a factor of 50. 3977 3978 Collective on MPI_Comm 3979 3980 Input Parameters: 3981 + comm - MPI communicator 3982 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3983 This value should be the same as the local size used in creating the 3984 y vector for the matrix-vector product y = Ax. 3985 . n - This value should be the same as the local size used in creating the 3986 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3987 calculated if N is given) For square matrices n is almost always m. 3988 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3989 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3990 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3991 (same value is used for all local rows) 3992 . d_nnz - array containing the number of nonzeros in the various rows of the 3993 DIAGONAL portion of the local submatrix (possibly different for each row) 3994 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3995 The size of this array is equal to the number of local rows, i.e 'm'. 3996 You must leave room for the diagonal entry even if it is zero. 3997 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3998 submatrix (same value is used for all local rows). 3999 - o_nnz - array containing the number of nonzeros in the various rows of the 4000 OFF-DIAGONAL portion of the local submatrix (possibly different for 4001 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4002 structure. The size of this array is equal to the number 4003 of local rows, i.e 'm'. 4004 4005 Output Parameter: 4006 . A - the matrix 4007 4008 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4009 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4010 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4011 4012 Notes: 4013 If the *_nnz parameter is given then the *_nz parameter is ignored 4014 4015 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4016 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4017 storage requirements for this matrix. 4018 4019 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4020 processor than it must be used on all processors that share the object for 4021 that argument. 4022 4023 The user MUST specify either the local or global matrix dimensions 4024 (possibly both). 4025 4026 The parallel matrix is partitioned across processors such that the 4027 first m0 rows belong to process 0, the next m1 rows belong to 4028 process 1, the next m2 rows belong to process 2 etc.. where 4029 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4030 values corresponding to [m x N] submatrix. 4031 4032 The columns are logically partitioned with the n0 columns belonging 4033 to 0th partition, the next n1 columns belonging to the next 4034 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4035 4036 The DIAGONAL portion of the local submatrix on any given processor 4037 is the submatrix corresponding to the rows and columns m,n 4038 corresponding to the given processor. i.e diagonal matrix on 4039 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4040 etc. The remaining portion of the local submatrix [m x (N-n)] 4041 constitute the OFF-DIAGONAL portion. The example below better 4042 illustrates this concept. 4043 4044 For a square global matrix we define each processor's diagonal portion 4045 to be its local rows and the corresponding columns (a square submatrix); 4046 each processor's off-diagonal portion encompasses the remainder of the 4047 local matrix (a rectangular submatrix). 4048 4049 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4050 4051 When calling this routine with a single process communicator, a matrix of 4052 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4053 type of communicator, use the construction mechanism: 4054 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4055 4056 By default, this format uses inodes (identical nodes) when possible. 4057 We search for consecutive rows with the same nonzero structure, thereby 4058 reusing matrix information to achieve increased efficiency. 4059 4060 Options Database Keys: 4061 + -mat_no_inode - Do not use inodes 4062 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4063 - -mat_aij_oneindex - Internally use indexing starting at 1 4064 rather than 0. Note that when calling MatSetValues(), 4065 the user still MUST index entries starting at 0! 4066 4067 4068 Example usage: 4069 4070 Consider the following 8x8 matrix with 34 non-zero values, that is 4071 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4072 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4073 as follows: 4074 4075 .vb 4076 1 2 0 | 0 3 0 | 0 4 4077 Proc0 0 5 6 | 7 0 0 | 8 0 4078 9 0 10 | 11 0 0 | 12 0 4079 ------------------------------------- 4080 13 0 14 | 15 16 17 | 0 0 4081 Proc1 0 18 0 | 19 20 21 | 0 0 4082 0 0 0 | 22 23 0 | 24 0 4083 ------------------------------------- 4084 Proc2 25 26 27 | 0 0 28 | 29 0 4085 30 0 0 | 31 32 33 | 0 34 4086 .ve 4087 4088 This can be represented as a collection of submatrices as: 4089 4090 .vb 4091 A B C 4092 D E F 4093 G H I 4094 .ve 4095 4096 Where the submatrices A,B,C are owned by proc0, D,E,F are 4097 owned by proc1, G,H,I are owned by proc2. 4098 4099 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4100 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4101 The 'M','N' parameters are 8,8, and have the same values on all procs. 4102 4103 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4104 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4105 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4106 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4107 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4108 matrix, ans [DF] as another SeqAIJ matrix. 4109 4110 When d_nz, o_nz parameters are specified, d_nz storage elements are 4111 allocated for every row of the local diagonal submatrix, and o_nz 4112 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4113 One way to choose d_nz and o_nz is to use the max nonzerors per local 4114 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4115 In this case, the values of d_nz,o_nz are: 4116 .vb 4117 proc0 : dnz = 2, o_nz = 2 4118 proc1 : dnz = 3, o_nz = 2 4119 proc2 : dnz = 1, o_nz = 4 4120 .ve 4121 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4122 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4123 for proc3. i.e we are using 12+15+10=37 storage locations to store 4124 34 values. 4125 4126 When d_nnz, o_nnz parameters are specified, the storage is specified 4127 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4128 In the above case the values for d_nnz,o_nnz are: 4129 .vb 4130 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4131 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4132 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4133 .ve 4134 Here the space allocated is sum of all the above values i.e 34, and 4135 hence pre-allocation is perfect. 4136 4137 Level: intermediate 4138 4139 .keywords: matrix, aij, compressed row, sparse, parallel 4140 4141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4142 MPIAIJ, MatCreateMPIAIJWithArrays() 4143 @*/ 4144 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4145 { 4146 PetscErrorCode ierr; 4147 PetscMPIInt size; 4148 4149 PetscFunctionBegin; 4150 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4151 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4152 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4153 if (size > 1) { 4154 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4155 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4156 } else { 4157 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4158 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4159 } 4160 PetscFunctionReturn(0); 4161 } 4162 4163 #undef __FUNCT__ 4164 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4165 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4166 { 4167 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4168 4169 PetscFunctionBegin; 4170 *Ad = a->A; 4171 *Ao = a->B; 4172 *colmap = a->garray; 4173 PetscFunctionReturn(0); 4174 } 4175 4176 #undef __FUNCT__ 4177 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4178 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4179 { 4180 PetscErrorCode ierr; 4181 PetscInt i; 4182 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4183 4184 PetscFunctionBegin; 4185 if (coloring->ctype == IS_COLORING_GLOBAL) { 4186 ISColoringValue *allcolors,*colors; 4187 ISColoring ocoloring; 4188 4189 /* set coloring for diagonal portion */ 4190 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4191 4192 /* set coloring for off-diagonal portion */ 4193 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4194 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4195 for (i=0; i<a->B->cmap->n; i++) { 4196 colors[i] = allcolors[a->garray[i]]; 4197 } 4198 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4199 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4200 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4201 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4202 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4203 ISColoringValue *colors; 4204 PetscInt *larray; 4205 ISColoring ocoloring; 4206 4207 /* set coloring for diagonal portion */ 4208 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4209 for (i=0; i<a->A->cmap->n; i++) { 4210 larray[i] = i + A->cmap->rstart; 4211 } 4212 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4213 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4214 for (i=0; i<a->A->cmap->n; i++) { 4215 colors[i] = coloring->colors[larray[i]]; 4216 } 4217 ierr = PetscFree(larray);CHKERRQ(ierr); 4218 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4219 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4220 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4221 4222 /* set coloring for off-diagonal portion */ 4223 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4224 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4225 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4226 for (i=0; i<a->B->cmap->n; i++) { 4227 colors[i] = coloring->colors[larray[i]]; 4228 } 4229 ierr = PetscFree(larray);CHKERRQ(ierr); 4230 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4231 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4232 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4233 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4234 4235 PetscFunctionReturn(0); 4236 } 4237 4238 #if defined(PETSC_HAVE_ADIC) 4239 #undef __FUNCT__ 4240 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4241 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4242 { 4243 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4244 PetscErrorCode ierr; 4245 4246 PetscFunctionBegin; 4247 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4248 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4249 PetscFunctionReturn(0); 4250 } 4251 #endif 4252 4253 #undef __FUNCT__ 4254 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4255 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4256 { 4257 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4258 PetscErrorCode ierr; 4259 4260 PetscFunctionBegin; 4261 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4262 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4263 PetscFunctionReturn(0); 4264 } 4265 4266 #undef __FUNCT__ 4267 #define __FUNCT__ "MatMergeSymbolic" 4268 PetscErrorCode MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4269 { 4270 PetscErrorCode ierr; 4271 PetscInt m,N,i,rstart,nnz,*dnz,*onz; 4272 PetscInt *indx; 4273 4274 PetscFunctionBegin; 4275 /* This routine will ONLY return MPIAIJ type matrix */ 4276 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4277 if (n == PETSC_DECIDE){ 4278 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4279 } 4280 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4281 rstart -= m; 4282 4283 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4284 for (i=0;i<m;i++) { 4285 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4286 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4287 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4288 } 4289 4290 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4291 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4292 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4293 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4294 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4295 PetscFunctionReturn(0); 4296 } 4297 4298 #undef __FUNCT__ 4299 #define __FUNCT__ "MatMergeNumeric" 4300 PetscErrorCode MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4301 { 4302 PetscErrorCode ierr; 4303 PetscInt m,N,i,rstart,nnz,Ii; 4304 PetscInt *indx; 4305 PetscScalar *values; 4306 4307 PetscFunctionBegin; 4308 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4309 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4310 for (i=0;i<m;i++) { 4311 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4312 Ii = i + rstart; 4313 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4314 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4315 } 4316 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4317 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4318 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4319 PetscFunctionReturn(0); 4320 } 4321 4322 #undef __FUNCT__ 4323 #define __FUNCT__ "MatMerge" 4324 /*@ 4325 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4326 matrices from each processor 4327 4328 Collective on MPI_Comm 4329 4330 Input Parameters: 4331 + comm - the communicators the parallel matrix will live on 4332 . inmat - the input sequential matrices 4333 . n - number of local columns (or PETSC_DECIDE) 4334 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4335 4336 Output Parameter: 4337 . outmat - the parallel matrix generated 4338 4339 Level: advanced 4340 4341 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4342 4343 @*/ 4344 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4345 { 4346 PetscErrorCode ierr; 4347 4348 PetscFunctionBegin; 4349 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4350 if (scall == MAT_INITIAL_MATRIX){ 4351 ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4352 } 4353 ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4354 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4355 PetscFunctionReturn(0); 4356 } 4357 4358 #undef __FUNCT__ 4359 #define __FUNCT__ "MatFileSplit" 4360 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4361 { 4362 PetscErrorCode ierr; 4363 PetscMPIInt rank; 4364 PetscInt m,N,i,rstart,nnz; 4365 size_t len; 4366 const PetscInt *indx; 4367 PetscViewer out; 4368 char *name; 4369 Mat B; 4370 const PetscScalar *values; 4371 4372 PetscFunctionBegin; 4373 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4374 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4375 /* Should this be the type of the diagonal block of A? */ 4376 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4377 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4378 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4379 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4380 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4381 for (i=0;i<m;i++) { 4382 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4383 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4384 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4385 } 4386 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4387 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4388 4389 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4390 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4391 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4392 sprintf(name,"%s.%d",outfile,rank); 4393 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4394 ierr = PetscFree(name); 4395 ierr = MatView(B,out);CHKERRQ(ierr); 4396 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4397 ierr = MatDestroy(&B);CHKERRQ(ierr); 4398 PetscFunctionReturn(0); 4399 } 4400 4401 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4402 #undef __FUNCT__ 4403 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4404 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4405 { 4406 PetscErrorCode ierr; 4407 Mat_Merge_SeqsToMPI *merge; 4408 PetscContainer container; 4409 4410 PetscFunctionBegin; 4411 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4412 if (container) { 4413 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4414 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4415 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4416 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4417 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4418 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4419 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4420 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4421 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4423 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4424 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4425 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4426 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4427 ierr = PetscFree(merge);CHKERRQ(ierr); 4428 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4429 } 4430 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4431 PetscFunctionReturn(0); 4432 } 4433 4434 #include <../src/mat/utils/freespace.h> 4435 #include <petscbt.h> 4436 4437 #undef __FUNCT__ 4438 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4439 /*@C 4440 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4441 matrices from each processor 4442 4443 Collective on MPI_Comm 4444 4445 Input Parameters: 4446 + comm - the communicators the parallel matrix will live on 4447 . seqmat - the input sequential matrices 4448 . m - number of local rows (or PETSC_DECIDE) 4449 . n - number of local columns (or PETSC_DECIDE) 4450 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4451 4452 Output Parameter: 4453 . mpimat - the parallel matrix generated 4454 4455 Level: advanced 4456 4457 Notes: 4458 The dimensions of the sequential matrix in each processor MUST be the same. 4459 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4460 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4461 @*/ 4462 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4463 { 4464 PetscErrorCode ierr; 4465 MPI_Comm comm=((PetscObject)mpimat)->comm; 4466 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4467 PetscMPIInt size,rank,taga,*len_s; 4468 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4469 PetscInt proc,m; 4470 PetscInt **buf_ri,**buf_rj; 4471 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4472 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4473 MPI_Request *s_waits,*r_waits; 4474 MPI_Status *status; 4475 MatScalar *aa=a->a; 4476 MatScalar **abuf_r,*ba_i; 4477 Mat_Merge_SeqsToMPI *merge; 4478 PetscContainer container; 4479 4480 PetscFunctionBegin; 4481 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4482 4483 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4484 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4485 4486 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4487 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4488 4489 bi = merge->bi; 4490 bj = merge->bj; 4491 buf_ri = merge->buf_ri; 4492 buf_rj = merge->buf_rj; 4493 4494 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4495 owners = merge->rowmap->range; 4496 len_s = merge->len_s; 4497 4498 /* send and recv matrix values */ 4499 /*-----------------------------*/ 4500 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4501 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4502 4503 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4504 for (proc=0,k=0; proc<size; proc++){ 4505 if (!len_s[proc]) continue; 4506 i = owners[proc]; 4507 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4508 k++; 4509 } 4510 4511 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4512 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4513 ierr = PetscFree(status);CHKERRQ(ierr); 4514 4515 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4516 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4517 4518 /* insert mat values of mpimat */ 4519 /*----------------------------*/ 4520 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4521 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4522 4523 for (k=0; k<merge->nrecv; k++){ 4524 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4525 nrows = *(buf_ri_k[k]); 4526 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4527 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4528 } 4529 4530 /* set values of ba */ 4531 m = merge->rowmap->n; 4532 for (i=0; i<m; i++) { 4533 arow = owners[rank] + i; 4534 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4535 bnzi = bi[i+1] - bi[i]; 4536 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4537 4538 /* add local non-zero vals of this proc's seqmat into ba */ 4539 anzi = ai[arow+1] - ai[arow]; 4540 aj = a->j + ai[arow]; 4541 aa = a->a + ai[arow]; 4542 nextaj = 0; 4543 for (j=0; nextaj<anzi; j++){ 4544 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4545 ba_i[j] += aa[nextaj++]; 4546 } 4547 } 4548 4549 /* add received vals into ba */ 4550 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4551 /* i-th row */ 4552 if (i == *nextrow[k]) { 4553 anzi = *(nextai[k]+1) - *nextai[k]; 4554 aj = buf_rj[k] + *(nextai[k]); 4555 aa = abuf_r[k] + *(nextai[k]); 4556 nextaj = 0; 4557 for (j=0; nextaj<anzi; j++){ 4558 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4559 ba_i[j] += aa[nextaj++]; 4560 } 4561 } 4562 nextrow[k]++; nextai[k]++; 4563 } 4564 } 4565 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4566 } 4567 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4568 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4569 4570 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4571 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4572 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4573 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4574 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4575 PetscFunctionReturn(0); 4576 } 4577 4578 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4579 4580 #undef __FUNCT__ 4581 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4582 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4583 { 4584 PetscErrorCode ierr; 4585 Mat B_mpi; 4586 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4587 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4588 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4589 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4590 PetscInt len,proc,*dnz,*onz; 4591 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4592 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4593 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4594 MPI_Status *status; 4595 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4596 PetscBT lnkbt; 4597 Mat_Merge_SeqsToMPI *merge; 4598 PetscContainer container; 4599 4600 PetscFunctionBegin; 4601 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4602 4603 /* make sure it is a PETSc comm */ 4604 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4605 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4606 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4607 4608 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4609 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4610 4611 /* determine row ownership */ 4612 /*---------------------------------------------------------*/ 4613 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4614 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4615 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4616 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4617 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4618 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4619 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4620 4621 m = merge->rowmap->n; 4622 M = merge->rowmap->N; 4623 owners = merge->rowmap->range; 4624 4625 /* determine the number of messages to send, their lengths */ 4626 /*---------------------------------------------------------*/ 4627 len_s = merge->len_s; 4628 4629 len = 0; /* length of buf_si[] */ 4630 merge->nsend = 0; 4631 for (proc=0; proc<size; proc++){ 4632 len_si[proc] = 0; 4633 if (proc == rank){ 4634 len_s[proc] = 0; 4635 } else { 4636 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4637 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4638 } 4639 if (len_s[proc]) { 4640 merge->nsend++; 4641 nrows = 0; 4642 for (i=owners[proc]; i<owners[proc+1]; i++){ 4643 if (ai[i+1] > ai[i]) nrows++; 4644 } 4645 len_si[proc] = 2*(nrows+1); 4646 len += len_si[proc]; 4647 } 4648 } 4649 4650 /* determine the number and length of messages to receive for ij-structure */ 4651 /*-------------------------------------------------------------------------*/ 4652 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4653 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4654 4655 /* post the Irecv of j-structure */ 4656 /*-------------------------------*/ 4657 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4658 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4659 4660 /* post the Isend of j-structure */ 4661 /*--------------------------------*/ 4662 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4663 4664 for (proc=0, k=0; proc<size; proc++){ 4665 if (!len_s[proc]) continue; 4666 i = owners[proc]; 4667 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4668 k++; 4669 } 4670 4671 /* receives and sends of j-structure are complete */ 4672 /*------------------------------------------------*/ 4673 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4674 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4675 4676 /* send and recv i-structure */ 4677 /*---------------------------*/ 4678 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4679 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4680 4681 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4682 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4683 for (proc=0,k=0; proc<size; proc++){ 4684 if (!len_s[proc]) continue; 4685 /* form outgoing message for i-structure: 4686 buf_si[0]: nrows to be sent 4687 [1:nrows]: row index (global) 4688 [nrows+1:2*nrows+1]: i-structure index 4689 */ 4690 /*-------------------------------------------*/ 4691 nrows = len_si[proc]/2 - 1; 4692 buf_si_i = buf_si + nrows+1; 4693 buf_si[0] = nrows; 4694 buf_si_i[0] = 0; 4695 nrows = 0; 4696 for (i=owners[proc]; i<owners[proc+1]; i++){ 4697 anzi = ai[i+1] - ai[i]; 4698 if (anzi) { 4699 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4700 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4701 nrows++; 4702 } 4703 } 4704 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4705 k++; 4706 buf_si += len_si[proc]; 4707 } 4708 4709 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4710 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4711 4712 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4713 for (i=0; i<merge->nrecv; i++){ 4714 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4715 } 4716 4717 ierr = PetscFree(len_si);CHKERRQ(ierr); 4718 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4719 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4720 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4721 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4722 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4723 ierr = PetscFree(status);CHKERRQ(ierr); 4724 4725 /* compute a local seq matrix in each processor */ 4726 /*----------------------------------------------*/ 4727 /* allocate bi array and free space for accumulating nonzero column info */ 4728 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4729 bi[0] = 0; 4730 4731 /* create and initialize a linked list */ 4732 nlnk = N+1; 4733 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4734 4735 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4736 len = 0; 4737 len = ai[owners[rank+1]] - ai[owners[rank]]; 4738 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4739 current_space = free_space; 4740 4741 /* determine symbolic info for each local row */ 4742 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4743 4744 for (k=0; k<merge->nrecv; k++){ 4745 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4746 nrows = *buf_ri_k[k]; 4747 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4748 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4749 } 4750 4751 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4752 len = 0; 4753 for (i=0;i<m;i++) { 4754 bnzi = 0; 4755 /* add local non-zero cols of this proc's seqmat into lnk */ 4756 arow = owners[rank] + i; 4757 anzi = ai[arow+1] - ai[arow]; 4758 aj = a->j + ai[arow]; 4759 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4760 bnzi += nlnk; 4761 /* add received col data into lnk */ 4762 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4763 if (i == *nextrow[k]) { /* i-th row */ 4764 anzi = *(nextai[k]+1) - *nextai[k]; 4765 aj = buf_rj[k] + *nextai[k]; 4766 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4767 bnzi += nlnk; 4768 nextrow[k]++; nextai[k]++; 4769 } 4770 } 4771 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4772 4773 /* if free space is not available, make more free space */ 4774 if (current_space->local_remaining<bnzi) { 4775 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4776 nspacedouble++; 4777 } 4778 /* copy data into free space, then initialize lnk */ 4779 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4780 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4781 4782 current_space->array += bnzi; 4783 current_space->local_used += bnzi; 4784 current_space->local_remaining -= bnzi; 4785 4786 bi[i+1] = bi[i] + bnzi; 4787 } 4788 4789 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4790 4791 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4792 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4793 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4794 4795 /* create symbolic parallel matrix B_mpi */ 4796 /*---------------------------------------*/ 4797 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4798 if (n==PETSC_DECIDE) { 4799 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4800 } else { 4801 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4802 } 4803 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4804 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4805 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4806 4807 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4808 B_mpi->assembled = PETSC_FALSE; 4809 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4810 merge->bi = bi; 4811 merge->bj = bj; 4812 merge->buf_ri = buf_ri; 4813 merge->buf_rj = buf_rj; 4814 merge->coi = PETSC_NULL; 4815 merge->coj = PETSC_NULL; 4816 merge->owners_co = PETSC_NULL; 4817 4818 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4819 4820 /* attach the supporting struct to B_mpi for reuse */ 4821 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4822 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4823 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4824 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4825 *mpimat = B_mpi; 4826 4827 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4828 PetscFunctionReturn(0); 4829 } 4830 4831 #undef __FUNCT__ 4832 #define __FUNCT__ "MatMerge_SeqsToMPI" 4833 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4834 { 4835 PetscErrorCode ierr; 4836 4837 PetscFunctionBegin; 4838 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4839 if (scall == MAT_INITIAL_MATRIX){ 4840 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4841 } 4842 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4843 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4844 PetscFunctionReturn(0); 4845 } 4846 4847 #undef __FUNCT__ 4848 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4849 /*@ 4850 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4851 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4852 with MatGetSize() 4853 4854 Not Collective 4855 4856 Input Parameters: 4857 + A - the matrix 4858 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4859 4860 Output Parameter: 4861 . A_loc - the local sequential matrix generated 4862 4863 Level: developer 4864 4865 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4866 4867 @*/ 4868 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4869 { 4870 PetscErrorCode ierr; 4871 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4872 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4873 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4874 MatScalar *aa=a->a,*ba=b->a,*cam; 4875 PetscScalar *ca; 4876 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4877 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4878 PetscBool match; 4879 4880 PetscFunctionBegin; 4881 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4882 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4883 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4884 if (scall == MAT_INITIAL_MATRIX){ 4885 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4886 ci[0] = 0; 4887 for (i=0; i<am; i++){ 4888 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4889 } 4890 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4891 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4892 k = 0; 4893 for (i=0; i<am; i++) { 4894 ncols_o = bi[i+1] - bi[i]; 4895 ncols_d = ai[i+1] - ai[i]; 4896 /* off-diagonal portion of A */ 4897 for (jo=0; jo<ncols_o; jo++) { 4898 col = cmap[*bj]; 4899 if (col >= cstart) break; 4900 cj[k] = col; bj++; 4901 ca[k++] = *ba++; 4902 } 4903 /* diagonal portion of A */ 4904 for (j=0; j<ncols_d; j++) { 4905 cj[k] = cstart + *aj++; 4906 ca[k++] = *aa++; 4907 } 4908 /* off-diagonal portion of A */ 4909 for (j=jo; j<ncols_o; j++) { 4910 cj[k] = cmap[*bj++]; 4911 ca[k++] = *ba++; 4912 } 4913 } 4914 /* put together the new matrix */ 4915 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4916 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4917 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4918 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4919 mat->free_a = PETSC_TRUE; 4920 mat->free_ij = PETSC_TRUE; 4921 mat->nonew = 0; 4922 } else if (scall == MAT_REUSE_MATRIX){ 4923 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4924 ci = mat->i; cj = mat->j; cam = mat->a; 4925 for (i=0; i<am; i++) { 4926 /* off-diagonal portion of A */ 4927 ncols_o = bi[i+1] - bi[i]; 4928 for (jo=0; jo<ncols_o; jo++) { 4929 col = cmap[*bj]; 4930 if (col >= cstart) break; 4931 *cam++ = *ba++; bj++; 4932 } 4933 /* diagonal portion of A */ 4934 ncols_d = ai[i+1] - ai[i]; 4935 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4936 /* off-diagonal portion of A */ 4937 for (j=jo; j<ncols_o; j++) { 4938 *cam++ = *ba++; bj++; 4939 } 4940 } 4941 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4942 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4943 PetscFunctionReturn(0); 4944 } 4945 4946 #undef __FUNCT__ 4947 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4948 /*@C 4949 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4950 4951 Not Collective 4952 4953 Input Parameters: 4954 + A - the matrix 4955 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4956 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4957 4958 Output Parameter: 4959 . A_loc - the local sequential matrix generated 4960 4961 Level: developer 4962 4963 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4964 4965 @*/ 4966 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4967 { 4968 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4969 PetscErrorCode ierr; 4970 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4971 IS isrowa,iscola; 4972 Mat *aloc; 4973 PetscBool match; 4974 4975 PetscFunctionBegin; 4976 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4977 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4978 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4979 if (!row){ 4980 start = A->rmap->rstart; end = A->rmap->rend; 4981 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4982 } else { 4983 isrowa = *row; 4984 } 4985 if (!col){ 4986 start = A->cmap->rstart; 4987 cmap = a->garray; 4988 nzA = a->A->cmap->n; 4989 nzB = a->B->cmap->n; 4990 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4991 ncols = 0; 4992 for (i=0; i<nzB; i++) { 4993 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4994 else break; 4995 } 4996 imark = i; 4997 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4998 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4999 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5000 } else { 5001 iscola = *col; 5002 } 5003 if (scall != MAT_INITIAL_MATRIX){ 5004 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5005 aloc[0] = *A_loc; 5006 } 5007 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5008 *A_loc = aloc[0]; 5009 ierr = PetscFree(aloc);CHKERRQ(ierr); 5010 if (!row){ 5011 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5012 } 5013 if (!col){ 5014 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5015 } 5016 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5017 PetscFunctionReturn(0); 5018 } 5019 5020 #undef __FUNCT__ 5021 #define __FUNCT__ "MatGetBrowsOfAcols" 5022 /*@C 5023 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5024 5025 Collective on Mat 5026 5027 Input Parameters: 5028 + A,B - the matrices in mpiaij format 5029 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5030 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5031 5032 Output Parameter: 5033 + rowb, colb - index sets of rows and columns of B to extract 5034 - B_seq - the sequential matrix generated 5035 5036 Level: developer 5037 5038 @*/ 5039 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5040 { 5041 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5042 PetscErrorCode ierr; 5043 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5044 IS isrowb,iscolb; 5045 Mat *bseq=PETSC_NULL; 5046 5047 PetscFunctionBegin; 5048 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5049 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5050 } 5051 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5052 5053 if (scall == MAT_INITIAL_MATRIX){ 5054 start = A->cmap->rstart; 5055 cmap = a->garray; 5056 nzA = a->A->cmap->n; 5057 nzB = a->B->cmap->n; 5058 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5059 ncols = 0; 5060 for (i=0; i<nzB; i++) { /* row < local row index */ 5061 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5062 else break; 5063 } 5064 imark = i; 5065 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5066 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5067 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5068 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5069 } else { 5070 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5071 isrowb = *rowb; iscolb = *colb; 5072 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5073 bseq[0] = *B_seq; 5074 } 5075 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5076 *B_seq = bseq[0]; 5077 ierr = PetscFree(bseq);CHKERRQ(ierr); 5078 if (!rowb){ 5079 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5080 } else { 5081 *rowb = isrowb; 5082 } 5083 if (!colb){ 5084 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5085 } else { 5086 *colb = iscolb; 5087 } 5088 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5089 PetscFunctionReturn(0); 5090 } 5091 5092 #undef __FUNCT__ 5093 #define __FUNCT__ "MatGetBrowsOfAoCols" 5094 /*@C 5095 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5096 of the OFF-DIAGONAL portion of local A 5097 5098 Collective on Mat 5099 5100 Input Parameters: 5101 + A,B - the matrices in mpiaij format 5102 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5103 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5104 . startsj_r - similar to startsj for receives 5105 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5106 5107 Output Parameter: 5108 + B_oth - the sequential matrix generated 5109 5110 Level: developer 5111 5112 @*/ 5113 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5114 { 5115 VecScatter_MPI_General *gen_to,*gen_from; 5116 PetscErrorCode ierr; 5117 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5118 Mat_SeqAIJ *b_oth; 5119 VecScatter ctx=a->Mvctx; 5120 MPI_Comm comm=((PetscObject)ctx)->comm; 5121 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5122 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5123 PetscScalar *rvalues,*svalues; 5124 MatScalar *b_otha,*bufa,*bufA; 5125 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5126 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5127 MPI_Status *sstatus,rstatus; 5128 PetscMPIInt jj; 5129 PetscInt *cols,sbs,rbs; 5130 PetscScalar *vals; 5131 5132 PetscFunctionBegin; 5133 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5134 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5135 } 5136 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5137 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5138 5139 gen_to = (VecScatter_MPI_General*)ctx->todata; 5140 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5141 rvalues = gen_from->values; /* holds the length of receiving row */ 5142 svalues = gen_to->values; /* holds the length of sending row */ 5143 nrecvs = gen_from->n; 5144 nsends = gen_to->n; 5145 5146 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5147 srow = gen_to->indices; /* local row index to be sent */ 5148 sstarts = gen_to->starts; 5149 sprocs = gen_to->procs; 5150 sstatus = gen_to->sstatus; 5151 sbs = gen_to->bs; 5152 rstarts = gen_from->starts; 5153 rprocs = gen_from->procs; 5154 rbs = gen_from->bs; 5155 5156 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5157 if (scall == MAT_INITIAL_MATRIX){ 5158 /* i-array */ 5159 /*---------*/ 5160 /* post receives */ 5161 for (i=0; i<nrecvs; i++){ 5162 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5163 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5164 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5165 } 5166 5167 /* pack the outgoing message */ 5168 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5169 sstartsj[0] = 0; rstartsj[0] = 0; 5170 len = 0; /* total length of j or a array to be sent */ 5171 k = 0; 5172 for (i=0; i<nsends; i++){ 5173 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5174 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5175 for (j=0; j<nrows; j++) { 5176 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5177 for (l=0; l<sbs; l++){ 5178 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5179 rowlen[j*sbs+l] = ncols; 5180 len += ncols; 5181 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5182 } 5183 k++; 5184 } 5185 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5186 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5187 } 5188 /* recvs and sends of i-array are completed */ 5189 i = nrecvs; 5190 while (i--) { 5191 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5192 } 5193 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5194 5195 /* allocate buffers for sending j and a arrays */ 5196 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5197 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5198 5199 /* create i-array of B_oth */ 5200 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5201 b_othi[0] = 0; 5202 len = 0; /* total length of j or a array to be received */ 5203 k = 0; 5204 for (i=0; i<nrecvs; i++){ 5205 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5206 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5207 for (j=0; j<nrows; j++) { 5208 b_othi[k+1] = b_othi[k] + rowlen[j]; 5209 len += rowlen[j]; k++; 5210 } 5211 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5212 } 5213 5214 /* allocate space for j and a arrrays of B_oth */ 5215 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5216 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5217 5218 /* j-array */ 5219 /*---------*/ 5220 /* post receives of j-array */ 5221 for (i=0; i<nrecvs; i++){ 5222 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5223 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5224 } 5225 5226 /* pack the outgoing message j-array */ 5227 k = 0; 5228 for (i=0; i<nsends; i++){ 5229 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5230 bufJ = bufj+sstartsj[i]; 5231 for (j=0; j<nrows; j++) { 5232 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5233 for (ll=0; ll<sbs; ll++){ 5234 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5235 for (l=0; l<ncols; l++){ 5236 *bufJ++ = cols[l]; 5237 } 5238 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5239 } 5240 } 5241 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5242 } 5243 5244 /* recvs and sends of j-array are completed */ 5245 i = nrecvs; 5246 while (i--) { 5247 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5248 } 5249 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5250 } else if (scall == MAT_REUSE_MATRIX){ 5251 sstartsj = *startsj; 5252 rstartsj = *startsj_r; 5253 bufa = *bufa_ptr; 5254 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5255 b_otha = b_oth->a; 5256 } else { 5257 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5258 } 5259 5260 /* a-array */ 5261 /*---------*/ 5262 /* post receives of a-array */ 5263 for (i=0; i<nrecvs; i++){ 5264 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5265 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5266 } 5267 5268 /* pack the outgoing message a-array */ 5269 k = 0; 5270 for (i=0; i<nsends; i++){ 5271 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5272 bufA = bufa+sstartsj[i]; 5273 for (j=0; j<nrows; j++) { 5274 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5275 for (ll=0; ll<sbs; ll++){ 5276 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5277 for (l=0; l<ncols; l++){ 5278 *bufA++ = vals[l]; 5279 } 5280 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5281 } 5282 } 5283 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5284 } 5285 /* recvs and sends of a-array are completed */ 5286 i = nrecvs; 5287 while (i--) { 5288 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5289 } 5290 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5291 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5292 5293 if (scall == MAT_INITIAL_MATRIX){ 5294 /* put together the new matrix */ 5295 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5296 5297 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5298 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5299 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5300 b_oth->free_a = PETSC_TRUE; 5301 b_oth->free_ij = PETSC_TRUE; 5302 b_oth->nonew = 0; 5303 5304 ierr = PetscFree(bufj);CHKERRQ(ierr); 5305 if (!startsj || !bufa_ptr){ 5306 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5307 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5308 } else { 5309 *startsj = sstartsj; 5310 *startsj_r = rstartsj; 5311 *bufa_ptr = bufa; 5312 } 5313 } 5314 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5315 PetscFunctionReturn(0); 5316 } 5317 5318 #undef __FUNCT__ 5319 #define __FUNCT__ "MatGetCommunicationStructs" 5320 /*@C 5321 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5322 5323 Not Collective 5324 5325 Input Parameters: 5326 . A - The matrix in mpiaij format 5327 5328 Output Parameter: 5329 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5330 . colmap - A map from global column index to local index into lvec 5331 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5332 5333 Level: developer 5334 5335 @*/ 5336 #if defined (PETSC_USE_CTABLE) 5337 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5338 #else 5339 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5340 #endif 5341 { 5342 Mat_MPIAIJ *a; 5343 5344 PetscFunctionBegin; 5345 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5346 PetscValidPointer(lvec, 2); 5347 PetscValidPointer(colmap, 3); 5348 PetscValidPointer(multScatter, 4); 5349 a = (Mat_MPIAIJ *) A->data; 5350 if (lvec) *lvec = a->lvec; 5351 if (colmap) *colmap = a->colmap; 5352 if (multScatter) *multScatter = a->Mvctx; 5353 PetscFunctionReturn(0); 5354 } 5355 5356 EXTERN_C_BEGIN 5357 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5358 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5359 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5360 EXTERN_C_END 5361 5362 #undef __FUNCT__ 5363 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5364 /* 5365 Computes (B'*A')' since computing B*A directly is untenable 5366 5367 n p p 5368 ( ) ( ) ( ) 5369 m ( A ) * n ( B ) = m ( C ) 5370 ( ) ( ) ( ) 5371 5372 */ 5373 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5374 { 5375 PetscErrorCode ierr; 5376 Mat At,Bt,Ct; 5377 5378 PetscFunctionBegin; 5379 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5380 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5381 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5382 ierr = MatDestroy(&At);CHKERRQ(ierr); 5383 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5384 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5385 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5386 PetscFunctionReturn(0); 5387 } 5388 5389 #undef __FUNCT__ 5390 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5391 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5392 { 5393 PetscErrorCode ierr; 5394 PetscInt m=A->rmap->n,n=B->cmap->n; 5395 Mat Cmat; 5396 5397 PetscFunctionBegin; 5398 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5399 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5400 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5401 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5402 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5403 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5404 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5405 *C = Cmat; 5406 PetscFunctionReturn(0); 5407 } 5408 5409 /* ----------------------------------------------------------------*/ 5410 #undef __FUNCT__ 5411 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5412 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5413 { 5414 PetscErrorCode ierr; 5415 5416 PetscFunctionBegin; 5417 if (scall == MAT_INITIAL_MATRIX){ 5418 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5419 } 5420 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5421 PetscFunctionReturn(0); 5422 } 5423 5424 EXTERN_C_BEGIN 5425 #if defined(PETSC_HAVE_MUMPS) 5426 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5427 #endif 5428 #if defined(PETSC_HAVE_PASTIX) 5429 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5430 #endif 5431 #if defined(PETSC_HAVE_SUPERLU_DIST) 5432 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5433 #endif 5434 #if defined(PETSC_HAVE_SPOOLES) 5435 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5436 #endif 5437 EXTERN_C_END 5438 5439 /*MC 5440 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5441 5442 Options Database Keys: 5443 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5444 5445 Level: beginner 5446 5447 .seealso: MatCreateMPIAIJ() 5448 M*/ 5449 5450 EXTERN_C_BEGIN 5451 #undef __FUNCT__ 5452 #define __FUNCT__ "MatCreate_MPIAIJ" 5453 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5454 { 5455 Mat_MPIAIJ *b; 5456 PetscErrorCode ierr; 5457 PetscMPIInt size; 5458 5459 PetscFunctionBegin; 5460 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5461 5462 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5463 B->data = (void*)b; 5464 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5465 B->rmap->bs = 1; 5466 B->assembled = PETSC_FALSE; 5467 5468 B->insertmode = NOT_SET_VALUES; 5469 b->size = size; 5470 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5471 5472 /* build cache for off array entries formed */ 5473 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5474 b->donotstash = PETSC_FALSE; 5475 b->colmap = 0; 5476 b->garray = 0; 5477 b->roworiented = PETSC_TRUE; 5478 5479 /* stuff used for matrix vector multiply */ 5480 b->lvec = PETSC_NULL; 5481 b->Mvctx = PETSC_NULL; 5482 5483 /* stuff for MatGetRow() */ 5484 b->rowindices = 0; 5485 b->rowvalues = 0; 5486 b->getrowactive = PETSC_FALSE; 5487 5488 #if defined(PETSC_HAVE_SPOOLES) 5489 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5490 "MatGetFactor_mpiaij_spooles", 5491 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5492 #endif 5493 #if defined(PETSC_HAVE_MUMPS) 5494 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5495 "MatGetFactor_aij_mumps", 5496 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5497 #endif 5498 #if defined(PETSC_HAVE_PASTIX) 5499 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5500 "MatGetFactor_mpiaij_pastix", 5501 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5502 #endif 5503 #if defined(PETSC_HAVE_SUPERLU_DIST) 5504 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5505 "MatGetFactor_mpiaij_superlu_dist", 5506 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5507 #endif 5508 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5509 "MatStoreValues_MPIAIJ", 5510 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5511 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5512 "MatRetrieveValues_MPIAIJ", 5513 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5514 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5515 "MatGetDiagonalBlock_MPIAIJ", 5516 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5517 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5518 "MatIsTranspose_MPIAIJ", 5519 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5520 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5521 "MatMPIAIJSetPreallocation_MPIAIJ", 5522 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5523 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5524 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5525 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5526 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5527 "MatDiagonalScaleLocal_MPIAIJ", 5528 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5529 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5530 "MatConvert_MPIAIJ_MPIAIJPERM", 5531 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5532 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5533 "MatConvert_MPIAIJ_MPIAIJCRL", 5534 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5535 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5536 "MatConvert_MPIAIJ_MPISBAIJ", 5537 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5538 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5539 "MatMatMult_MPIDense_MPIAIJ", 5540 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5541 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5542 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5543 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5544 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5545 "MatMatMultNumeric_MPIDense_MPIAIJ", 5546 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5547 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5548 PetscFunctionReturn(0); 5549 } 5550 EXTERN_C_END 5551 5552 #undef __FUNCT__ 5553 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5554 /*@ 5555 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5556 and "off-diagonal" part of the matrix in CSR format. 5557 5558 Collective on MPI_Comm 5559 5560 Input Parameters: 5561 + comm - MPI communicator 5562 . m - number of local rows (Cannot be PETSC_DECIDE) 5563 . n - This value should be the same as the local size used in creating the 5564 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5565 calculated if N is given) For square matrices n is almost always m. 5566 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5567 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5568 . i - row indices for "diagonal" portion of matrix 5569 . j - column indices 5570 . a - matrix values 5571 . oi - row indices for "off-diagonal" portion of matrix 5572 . oj - column indices 5573 - oa - matrix values 5574 5575 Output Parameter: 5576 . mat - the matrix 5577 5578 Level: advanced 5579 5580 Notes: 5581 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5582 must free the arrays once the matrix has been destroyed and not before. 5583 5584 The i and j indices are 0 based 5585 5586 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5587 5588 This sets local rows and cannot be used to set off-processor values. 5589 5590 You cannot later use MatSetValues() to change values in this matrix. 5591 5592 .keywords: matrix, aij, compressed row, sparse, parallel 5593 5594 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5595 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5596 @*/ 5597 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5598 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5599 { 5600 PetscErrorCode ierr; 5601 Mat_MPIAIJ *maij; 5602 5603 PetscFunctionBegin; 5604 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5605 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5606 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5607 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5608 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5609 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5610 maij = (Mat_MPIAIJ*) (*mat)->data; 5611 maij->donotstash = PETSC_TRUE; 5612 (*mat)->preallocated = PETSC_TRUE; 5613 5614 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5615 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5616 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5617 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5618 5619 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5620 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5621 5622 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5623 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5624 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5625 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5626 5627 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5628 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5629 PetscFunctionReturn(0); 5630 } 5631 5632 /* 5633 Special version for direct calls from Fortran 5634 */ 5635 #include <private/fortranimpl.h> 5636 5637 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5638 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5639 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5640 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5641 #endif 5642 5643 /* Change these macros so can be used in void function */ 5644 #undef CHKERRQ 5645 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5646 #undef SETERRQ2 5647 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5648 #undef SETERRQ 5649 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5650 5651 EXTERN_C_BEGIN 5652 #undef __FUNCT__ 5653 #define __FUNCT__ "matsetvaluesmpiaij_" 5654 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5655 { 5656 Mat mat = *mmat; 5657 PetscInt m = *mm, n = *mn; 5658 InsertMode addv = *maddv; 5659 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5660 PetscScalar value; 5661 PetscErrorCode ierr; 5662 5663 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5664 if (mat->insertmode == NOT_SET_VALUES) { 5665 mat->insertmode = addv; 5666 } 5667 #if defined(PETSC_USE_DEBUG) 5668 else if (mat->insertmode != addv) { 5669 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5670 } 5671 #endif 5672 { 5673 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5674 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5675 PetscBool roworiented = aij->roworiented; 5676 5677 /* Some Variables required in the macro */ 5678 Mat A = aij->A; 5679 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5680 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5681 MatScalar *aa = a->a; 5682 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5683 Mat B = aij->B; 5684 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5685 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5686 MatScalar *ba = b->a; 5687 5688 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5689 PetscInt nonew = a->nonew; 5690 MatScalar *ap1,*ap2; 5691 5692 PetscFunctionBegin; 5693 for (i=0; i<m; i++) { 5694 if (im[i] < 0) continue; 5695 #if defined(PETSC_USE_DEBUG) 5696 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5697 #endif 5698 if (im[i] >= rstart && im[i] < rend) { 5699 row = im[i] - rstart; 5700 lastcol1 = -1; 5701 rp1 = aj + ai[row]; 5702 ap1 = aa + ai[row]; 5703 rmax1 = aimax[row]; 5704 nrow1 = ailen[row]; 5705 low1 = 0; 5706 high1 = nrow1; 5707 lastcol2 = -1; 5708 rp2 = bj + bi[row]; 5709 ap2 = ba + bi[row]; 5710 rmax2 = bimax[row]; 5711 nrow2 = bilen[row]; 5712 low2 = 0; 5713 high2 = nrow2; 5714 5715 for (j=0; j<n; j++) { 5716 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5717 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5718 if (in[j] >= cstart && in[j] < cend){ 5719 col = in[j] - cstart; 5720 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5721 } else if (in[j] < 0) continue; 5722 #if defined(PETSC_USE_DEBUG) 5723 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5724 #endif 5725 else { 5726 if (mat->was_assembled) { 5727 if (!aij->colmap) { 5728 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5729 } 5730 #if defined (PETSC_USE_CTABLE) 5731 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5732 col--; 5733 #else 5734 col = aij->colmap[in[j]] - 1; 5735 #endif 5736 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5737 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5738 col = in[j]; 5739 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5740 B = aij->B; 5741 b = (Mat_SeqAIJ*)B->data; 5742 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5743 rp2 = bj + bi[row]; 5744 ap2 = ba + bi[row]; 5745 rmax2 = bimax[row]; 5746 nrow2 = bilen[row]; 5747 low2 = 0; 5748 high2 = nrow2; 5749 bm = aij->B->rmap->n; 5750 ba = b->a; 5751 } 5752 } else col = in[j]; 5753 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5754 } 5755 } 5756 } else { 5757 if (!aij->donotstash) { 5758 if (roworiented) { 5759 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5760 } else { 5761 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5762 } 5763 } 5764 } 5765 }} 5766 PetscFunctionReturnVoid(); 5767 } 5768 EXTERN_C_END 5769 5770