1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 187 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 188 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 189 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 190 rowners[0] = 0; 191 for (i=2; i<=size; i++) { 192 rowners[i] += rowners[i-1]; 193 } 194 rstart = rowners[rank]; 195 rend = rowners[rank+1]; 196 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 197 if (!rank) { 198 gmata = (Mat_SeqAIJ*) gmat->data; 199 /* send row lengths to all processors */ 200 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 201 for (i=1; i<size; i++) { 202 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 203 } 204 /* determine number diagonal and off-diagonal counts */ 205 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 206 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 207 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 208 jj = 0; 209 for (i=0; i<m; i++) { 210 for (j=0; j<dlens[i]; j++) { 211 if (gmata->j[jj] < rstart) ld[i]++; 212 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 213 jj++; 214 } 215 } 216 /* send column indices to other processes */ 217 for (i=1; i<size; i++) { 218 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 219 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 220 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 223 /* send numerical values to other processes */ 224 for (i=1; i<size; i++) { 225 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 226 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 227 } 228 gmataa = gmata->a; 229 gmataj = gmata->j; 230 231 } else { 232 /* receive row lengths */ 233 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 234 /* receive column indices */ 235 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 236 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 237 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 238 /* determine number diagonal and off-diagonal counts */ 239 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 241 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 242 jj = 0; 243 for (i=0; i<m; i++) { 244 for (j=0; j<dlens[i]; j++) { 245 if (gmataj[jj] < rstart) ld[i]++; 246 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 247 jj++; 248 } 249 } 250 /* receive numerical values */ 251 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 253 } 254 /* set preallocation */ 255 for (i=0; i<m; i++) { 256 dlens[i] -= olens[i]; 257 } 258 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 259 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 260 261 for (i=0; i<m; i++) { 262 dlens[i] += olens[i]; 263 } 264 cnt = 0; 265 for (i=0; i<m; i++) { 266 row = rstart + i; 267 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 268 cnt += dlens[i]; 269 } 270 if (rank) { 271 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 272 } 273 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 274 ierr = PetscFree(rowners);CHKERRQ(ierr); 275 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 276 *inmat = mat; 277 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 278 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 279 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 280 mat = *inmat; 281 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 282 if (!rank) { 283 /* send numerical values to other processes */ 284 gmata = (Mat_SeqAIJ*) gmat->data; 285 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 286 gmataa = gmata->a; 287 for (i=1; i<size; i++) { 288 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 289 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 290 } 291 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 292 } else { 293 /* receive numerical values from process 0*/ 294 nz = Ad->nz + Ao->nz; 295 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 296 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 297 } 298 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 299 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 300 ad = Ad->a; 301 ao = Ao->a; 302 if (mat->rmap->n) { 303 i = 0; 304 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 305 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 306 } 307 for (i=1; i<mat->rmap->n; i++) { 308 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 309 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 310 } 311 i--; 312 if (mat->rmap->n) { 313 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 314 } 315 if (rank) { 316 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 317 } 318 } 319 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 CHKMEMQ; 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 Local utility routine that creates a mapping from the global column 327 number to the local number in the off-diagonal part of the local 328 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 329 a slightly higher hash table cost; without it it is not scalable (each processor 330 has an order N integer array but is fast to acess. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 337 PetscErrorCode ierr; 338 PetscInt n = aij->B->cmap->n,i; 339 340 PetscFunctionBegin; 341 #if defined (PETSC_USE_CTABLE) 342 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 343 for (i=0; i<n; i++){ 344 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 345 } 346 #else 347 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 348 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 350 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 351 #endif 352 PetscFunctionReturn(0); 353 } 354 355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 356 { \ 357 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 358 lastcol1 = col;\ 359 while (high1-low1 > 5) { \ 360 t = (low1+high1)/2; \ 361 if (rp1[t] > col) high1 = t; \ 362 else low1 = t; \ 363 } \ 364 for (_i=low1; _i<high1; _i++) { \ 365 if (rp1[_i] > col) break; \ 366 if (rp1[_i] == col) { \ 367 if (addv == ADD_VALUES) ap1[_i] += value; \ 368 else ap1[_i] = value; \ 369 goto a_noinsert; \ 370 } \ 371 } \ 372 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 373 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 374 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 375 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 376 N = nrow1++ - 1; a->nz++; high1++; \ 377 /* shift up all the later entries in this row */ \ 378 for (ii=N; ii>=_i; ii--) { \ 379 rp1[ii+1] = rp1[ii]; \ 380 ap1[ii+1] = ap1[ii]; \ 381 } \ 382 rp1[_i] = col; \ 383 ap1[_i] = value; \ 384 a_noinsert: ; \ 385 ailen[row] = nrow1; \ 386 } 387 388 389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 390 { \ 391 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 392 lastcol2 = col;\ 393 while (high2-low2 > 5) { \ 394 t = (low2+high2)/2; \ 395 if (rp2[t] > col) high2 = t; \ 396 else low2 = t; \ 397 } \ 398 for (_i=low2; _i<high2; _i++) { \ 399 if (rp2[_i] > col) break; \ 400 if (rp2[_i] == col) { \ 401 if (addv == ADD_VALUES) ap2[_i] += value; \ 402 else ap2[_i] = value; \ 403 goto b_noinsert; \ 404 } \ 405 } \ 406 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 407 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 408 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 409 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 410 N = nrow2++ - 1; b->nz++; high2++; \ 411 /* shift up all the later entries in this row */ \ 412 for (ii=N; ii>=_i; ii--) { \ 413 rp2[ii+1] = rp2[ii]; \ 414 ap2[ii+1] = ap2[ii]; \ 415 } \ 416 rp2[_i] = col; \ 417 ap2[_i] = value; \ 418 b_noinsert: ; \ 419 bilen[row] = nrow2; \ 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 425 { 426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 427 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 428 PetscErrorCode ierr; 429 PetscInt l,*garray = mat->garray,diag; 430 431 PetscFunctionBegin; 432 /* code only works for square matrices A */ 433 434 /* find size of row to the left of the diagonal part */ 435 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 436 row = row - diag; 437 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 438 if (garray[b->j[b->i[row]+l]] > diag) break; 439 } 440 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 441 442 /* diagonal part */ 443 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 444 445 /* right of diagonal part */ 446 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValues_MPIAIJ" 452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 455 PetscScalar value; 456 PetscErrorCode ierr; 457 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 458 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 459 PetscBool roworiented = aij->roworiented; 460 461 /* Some Variables required in the macro */ 462 Mat A = aij->A; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 465 MatScalar *aa = a->a; 466 PetscBool ignorezeroentries = a->ignorezeroentries; 467 Mat B = aij->B; 468 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 469 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 470 MatScalar *ba = b->a; 471 472 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 473 PetscInt nonew = a->nonew; 474 MatScalar *ap1,*ap2; 475 476 PetscFunctionBegin; 477 if (v) PetscValidScalarPointer(v,6); 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_DEBUG) 481 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 lastcol1 = -1; 486 rp1 = aj + ai[row]; 487 ap1 = aa + ai[row]; 488 rmax1 = aimax[row]; 489 nrow1 = ailen[row]; 490 low1 = 0; 491 high1 = nrow1; 492 lastcol2 = -1; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 500 for (j=0; j<n; j++) { 501 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 502 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 503 if (in[j] >= cstart && in[j] < cend){ 504 col = in[j] - cstart; 505 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 506 } else if (in[j] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 509 #endif 510 else { 511 if (mat->was_assembled) { 512 if (!aij->colmap) { 513 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 517 col--; 518 #else 519 col = aij->colmap[in[j]] - 1; 520 #endif 521 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 522 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 523 col = in[j]; 524 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 525 B = aij->B; 526 b = (Mat_SeqAIJ*)B->data; 527 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 528 rp2 = bj + bi[row]; 529 ap2 = ba + bi[row]; 530 rmax2 = bimax[row]; 531 nrow2 = bilen[row]; 532 low2 = 0; 533 high2 = nrow2; 534 bm = aij->B->rmap->n; 535 ba = b->a; 536 } 537 } else col = in[j]; 538 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 539 } 540 } 541 } else { 542 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 543 if (!aij->donotstash) { 544 if (roworiented) { 545 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 546 } else { 547 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatGetValues_MPIAIJ" 557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 560 PetscErrorCode ierr; 561 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 562 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 563 564 PetscFunctionBegin; 565 for (i=0; i<m; i++) { 566 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 567 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 568 if (idxm[i] >= rstart && idxm[i] < rend) { 569 row = idxm[i] - rstart; 570 for (j=0; j<n; j++) { 571 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 572 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 573 if (idxn[j] >= cstart && idxn[j] < cend){ 574 col = idxn[j] - cstart; 575 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 576 } else { 577 if (!aij->colmap) { 578 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 579 } 580 #if defined (PETSC_USE_CTABLE) 581 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 582 col --; 583 #else 584 col = aij->colmap[idxn[j]] - 1; 585 #endif 586 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 587 else { 588 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else { 593 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (aij->donotstash || mat->nooffprocentries) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 618 mat->insertmode = addv; /* in case this processor had no cache */ 619 620 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 621 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 622 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 632 PetscErrorCode ierr; 633 PetscMPIInt n; 634 PetscInt i,j,rstart,ncols,flg; 635 PetscInt *row,*col; 636 PetscBool other_disassembled; 637 PetscScalar *val; 638 InsertMode addv = mat->insertmode; 639 640 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 641 PetscFunctionBegin; 642 if (!aij->donotstash && !mat->nooffprocentries) { 643 while (1) { 644 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 645 if (!flg) break; 646 647 for (i=0; i<n;) { 648 /* Now identify the consecutive vals belonging to the same row */ 649 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 650 if (j < n) ncols = j-i; 651 else ncols = n-i; 652 /* Now assemble all these values with a single function call */ 653 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 654 i = j; 655 } 656 } 657 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 658 } 659 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 660 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 661 662 /* determine if any processor has disassembled, if so we must 663 also disassemble ourselfs, in order that we may reassemble. */ 664 /* 665 if nonzero structure of submatrix B cannot change then we know that 666 no processor disassembled thus we can skip this stuff 667 */ 668 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 669 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 670 if (mat->was_assembled && !other_disassembled) { 671 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 672 } 673 } 674 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 675 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 676 } 677 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 679 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 680 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 681 682 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 683 aij->rowvalues = 0; 684 685 /* used by MatAXPY() */ 686 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 687 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 688 689 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 690 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 697 { 698 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 703 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatZeroRows_MPIAIJ" 709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 710 { 711 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 714 PetscInt i,*owners = A->rmap->range; 715 PetscInt *nprocs,j,idx,nsends,row; 716 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 717 PetscInt *rvalues,count,base,slen,*source; 718 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 719 MPI_Comm comm = ((PetscObject)A)->comm; 720 MPI_Request *send_waits,*recv_waits; 721 MPI_Status recv_status,*send_status; 722 const PetscScalar *xx; 723 PetscScalar *bb; 724 #if defined(PETSC_DEBUG) 725 PetscBool found = PETSC_FALSE; 726 #endif 727 728 PetscFunctionBegin; 729 /* first count number of contributors to each processor */ 730 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 731 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 732 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 733 j = 0; 734 for (i=0; i<N; i++) { 735 if (lastidx > (idx = rows[i])) j = 0; 736 lastidx = idx; 737 for (; j<size; j++) { 738 if (idx >= owners[j] && idx < owners[j+1]) { 739 nprocs[2*j]++; 740 nprocs[2*j+1] = 1; 741 owner[i] = j; 742 #if defined(PETSC_DEBUG) 743 found = PETSC_TRUE; 744 #endif 745 break; 746 } 747 } 748 #if defined(PETSC_DEBUG) 749 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 750 found = PETSC_FALSE; 751 #endif 752 } 753 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 754 755 if (A->nooffproczerorows) { 756 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 757 nrecvs = nsends; 758 nmax = N; 759 } else { 760 /* inform other processors of number of messages and max length*/ 761 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 762 } 763 764 /* post receives: */ 765 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 766 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 767 for (i=0; i<nrecvs; i++) { 768 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 769 } 770 771 /* do sends: 772 1) starts[i] gives the starting index in svalues for stuff going to 773 the ith processor 774 */ 775 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 776 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 777 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 778 starts[0] = 0; 779 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 780 for (i=0; i<N; i++) { 781 svalues[starts[owner[i]]++] = rows[i]; 782 } 783 784 starts[0] = 0; 785 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 786 count = 0; 787 for (i=0; i<size; i++) { 788 if (nprocs[2*i+1]) { 789 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 790 } 791 } 792 ierr = PetscFree(starts);CHKERRQ(ierr); 793 794 base = owners[rank]; 795 796 /* wait on receives */ 797 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 798 count = nrecvs; slen = 0; 799 while (count) { 800 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 801 /* unpack receives into our local space */ 802 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 803 source[imdex] = recv_status.MPI_SOURCE; 804 lens[imdex] = n; 805 slen += n; 806 count--; 807 } 808 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 809 810 /* move the data into the send scatter */ 811 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 812 count = 0; 813 for (i=0; i<nrecvs; i++) { 814 values = rvalues + i*nmax; 815 for (j=0; j<lens[i]; j++) { 816 lrows[count++] = values[j] - base; 817 } 818 } 819 ierr = PetscFree(rvalues);CHKERRQ(ierr); 820 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 821 ierr = PetscFree(owner);CHKERRQ(ierr); 822 ierr = PetscFree(nprocs);CHKERRQ(ierr); 823 824 /* fix right hand side if needed */ 825 if (x && b) { 826 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 827 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 828 for (i=0; i<slen; i++) { 829 bb[lrows[i]] = diag*xx[lrows[i]]; 830 } 831 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 833 } 834 /* 835 Zero the required rows. If the "diagonal block" of the matrix 836 is square and the user wishes to set the diagonal we use separate 837 code so that MatSetValues() is not called for each diagonal allocating 838 new memory, thus calling lots of mallocs and slowing things down. 839 840 */ 841 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 842 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 843 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 844 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 848 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 850 } 851 for (i = 0; i < slen; i++) { 852 row = lrows[i] + rstart; 853 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 854 } 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 857 } else { 858 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(lrows);CHKERRQ(ierr); 861 862 /* wait on sends */ 863 if (nsends) { 864 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 865 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 866 ierr = PetscFree(send_status);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(send_waits);CHKERRQ(ierr); 869 ierr = PetscFree(svalues);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 876 { 877 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 880 PetscInt i,*owners = A->rmap->range; 881 PetscInt *nprocs,j,idx,nsends; 882 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 883 PetscInt *rvalues,count,base,slen,*source; 884 PetscInt *lens,*lrows,*values,m; 885 MPI_Comm comm = ((PetscObject)A)->comm; 886 MPI_Request *send_waits,*recv_waits; 887 MPI_Status recv_status,*send_status; 888 const PetscScalar *xx; 889 PetscScalar *bb,*mask; 890 Vec xmask,lmask; 891 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 892 const PetscInt *aj, *ii,*ridx; 893 PetscScalar *aa; 894 #if defined(PETSC_DEBUG) 895 PetscBool found = PETSC_FALSE; 896 #endif 897 898 PetscFunctionBegin; 899 /* first count number of contributors to each processor */ 900 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 901 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 902 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 903 j = 0; 904 for (i=0; i<N; i++) { 905 if (lastidx > (idx = rows[i])) j = 0; 906 lastidx = idx; 907 for (; j<size; j++) { 908 if (idx >= owners[j] && idx < owners[j+1]) { 909 nprocs[2*j]++; 910 nprocs[2*j+1] = 1; 911 owner[i] = j; 912 #if defined(PETSC_DEBUG) 913 found = PETSC_TRUE; 914 #endif 915 break; 916 } 917 } 918 #if defined(PETSC_DEBUG) 919 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 920 found = PETSC_FALSE; 921 #endif 922 } 923 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 924 925 /* inform other processors of number of messages and max length*/ 926 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 927 928 /* post receives: */ 929 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 930 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 931 for (i=0; i<nrecvs; i++) { 932 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 933 } 934 935 /* do sends: 936 1) starts[i] gives the starting index in svalues for stuff going to 937 the ith processor 938 */ 939 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 940 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 941 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 942 starts[0] = 0; 943 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 944 for (i=0; i<N; i++) { 945 svalues[starts[owner[i]]++] = rows[i]; 946 } 947 948 starts[0] = 0; 949 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 950 count = 0; 951 for (i=0; i<size; i++) { 952 if (nprocs[2*i+1]) { 953 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 954 } 955 } 956 ierr = PetscFree(starts);CHKERRQ(ierr); 957 958 base = owners[rank]; 959 960 /* wait on receives */ 961 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 962 count = nrecvs; slen = 0; 963 while (count) { 964 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 965 /* unpack receives into our local space */ 966 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 967 source[imdex] = recv_status.MPI_SOURCE; 968 lens[imdex] = n; 969 slen += n; 970 count--; 971 } 972 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 973 974 /* move the data into the send scatter */ 975 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 976 count = 0; 977 for (i=0; i<nrecvs; i++) { 978 values = rvalues + i*nmax; 979 for (j=0; j<lens[i]; j++) { 980 lrows[count++] = values[j] - base; 981 } 982 } 983 ierr = PetscFree(rvalues);CHKERRQ(ierr); 984 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 985 ierr = PetscFree(owner);CHKERRQ(ierr); 986 ierr = PetscFree(nprocs);CHKERRQ(ierr); 987 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 988 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 991 992 /* handle off diagonal part of matrix */ 993 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<slen; i++) { 997 bb[lrows[i]] = 1; 998 } 999 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1003 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1006 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1007 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1008 1009 /* remove zeroed rows of off diagonal matrix */ 1010 ii = aij->i; 1011 for (i=0; i<slen; i++) { 1012 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1013 } 1014 1015 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1016 if (aij->compressedrow.use){ 1017 m = aij->compressedrow.nrows; 1018 ii = aij->compressedrow.i; 1019 ridx = aij->compressedrow.rindex; 1020 for (i=0; i<m; i++){ 1021 n = ii[i+1] - ii[i]; 1022 aj = aij->j + ii[i]; 1023 aa = aij->a + ii[i]; 1024 1025 for (j=0; j<n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 bb[*ridx] -= *aa*xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 ridx++; 1034 } 1035 } else { /* do not use compressed row format */ 1036 m = l->B->rmap->n; 1037 for (i=0; i<m; i++) { 1038 n = ii[i+1] - ii[i]; 1039 aj = aij->j + ii[i]; 1040 aa = aij->a + ii[i]; 1041 for (j=0; j<n; j++) { 1042 if (PetscAbsScalar(mask[*aj])) { 1043 bb[i] -= *aa*xx[*aj]; 1044 *aa = 0.0; 1045 } 1046 aa++; 1047 aj++; 1048 } 1049 } 1050 } 1051 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1052 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1053 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1054 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1055 ierr = PetscFree(lrows);CHKERRQ(ierr); 1056 1057 /* wait on sends */ 1058 if (nsends) { 1059 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1060 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1061 ierr = PetscFree(send_status);CHKERRQ(ierr); 1062 } 1063 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1064 ierr = PetscFree(svalues);CHKERRQ(ierr); 1065 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMult_MPIAIJ" 1071 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 PetscInt nt; 1076 1077 PetscFunctionBegin; 1078 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1079 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1080 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1082 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1083 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1089 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1101 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1108 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1109 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1116 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 PetscBool merged; 1121 1122 PetscFunctionBegin; 1123 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1124 /* do nondiagonal part */ 1125 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1126 if (!merged) { 1127 /* send it on its way */ 1128 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1129 /* do local part */ 1130 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1131 /* receive remote parts: note this assumes the values are not actually */ 1132 /* added in yy until the next line, */ 1133 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1134 } else { 1135 /* do local part */ 1136 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1137 /* send it on its way */ 1138 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1139 /* values actually were received in the Begin() but we need to call this nop */ 1140 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 } 1142 PetscFunctionReturn(0); 1143 } 1144 1145 EXTERN_C_BEGIN 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1148 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1149 { 1150 MPI_Comm comm; 1151 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1152 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1153 IS Me,Notme; 1154 PetscErrorCode ierr; 1155 PetscInt M,N,first,last,*notme,i; 1156 PetscMPIInt size; 1157 1158 PetscFunctionBegin; 1159 1160 /* Easy test: symmetric diagonal block */ 1161 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1162 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1163 if (!*f) PetscFunctionReturn(0); 1164 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1165 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1166 if (size == 1) PetscFunctionReturn(0); 1167 1168 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1169 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1170 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1171 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1172 for (i=0; i<first; i++) notme[i] = i; 1173 for (i=last; i<M; i++) notme[i-last+first] = i; 1174 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1175 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1176 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1177 Aoff = Aoffs[0]; 1178 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1179 Boff = Boffs[0]; 1180 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1181 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1182 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1183 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1184 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1185 ierr = PetscFree(notme);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 EXTERN_C_END 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1192 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1193 { 1194 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1195 PetscErrorCode ierr; 1196 1197 PetscFunctionBegin; 1198 /* do nondiagonal part */ 1199 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1200 /* send it on its way */ 1201 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1202 /* do local part */ 1203 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1204 /* receive remote parts */ 1205 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 /* 1210 This only works correctly for square matrices where the subblock A->A is the 1211 diagonal block 1212 */ 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1215 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1216 { 1217 PetscErrorCode ierr; 1218 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1219 1220 PetscFunctionBegin; 1221 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1222 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1223 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatScale_MPIAIJ" 1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1230 { 1231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1236 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatDestroy_MPIAIJ" 1242 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1243 { 1244 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 #if defined(PETSC_USE_LOG) 1249 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1250 #endif 1251 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1252 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1253 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1254 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1255 #if defined (PETSC_USE_CTABLE) 1256 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1257 #else 1258 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1259 #endif 1260 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1262 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1263 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1264 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1265 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1266 1267 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1281 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1282 { 1283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1284 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1285 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1286 PetscErrorCode ierr; 1287 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1288 int fd; 1289 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1290 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1291 PetscScalar *column_values; 1292 PetscInt message_count,flowcontrolcount; 1293 1294 PetscFunctionBegin; 1295 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1296 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1297 nz = A->nz + B->nz; 1298 if (!rank) { 1299 header[0] = MAT_FILE_CLASSID; 1300 header[1] = mat->rmap->N; 1301 header[2] = mat->cmap->N; 1302 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1303 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1304 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1305 /* get largest number of rows any processor has */ 1306 rlen = mat->rmap->n; 1307 range = mat->rmap->range; 1308 for (i=1; i<size; i++) { 1309 rlen = PetscMax(rlen,range[i+1] - range[i]); 1310 } 1311 } else { 1312 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1313 rlen = mat->rmap->n; 1314 } 1315 1316 /* load up the local row counts */ 1317 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1318 for (i=0; i<mat->rmap->n; i++) { 1319 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1320 } 1321 1322 /* store the row lengths to the file */ 1323 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1324 if (!rank) { 1325 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1326 for (i=1; i<size; i++) { 1327 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1328 rlen = range[i+1] - range[i]; 1329 ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1330 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1331 } 1332 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1333 } else { 1334 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1335 ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1336 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1337 } 1338 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1339 1340 /* load up the local column indices */ 1341 nzmax = nz; /* )th processor needs space a largest processor needs */ 1342 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1343 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1344 cnt = 0; 1345 for (i=0; i<mat->rmap->n; i++) { 1346 for (j=B->i[i]; j<B->i[i+1]; j++) { 1347 if ( (col = garray[B->j[j]]) > cstart) break; 1348 column_indices[cnt++] = col; 1349 } 1350 for (k=A->i[i]; k<A->i[i+1]; k++) { 1351 column_indices[cnt++] = A->j[k] + cstart; 1352 } 1353 for (; j<B->i[i+1]; j++) { 1354 column_indices[cnt++] = garray[B->j[j]]; 1355 } 1356 } 1357 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1358 1359 /* store the column indices to the file */ 1360 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1361 if (!rank) { 1362 MPI_Status status; 1363 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1364 for (i=1; i<size; i++) { 1365 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1366 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1367 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1368 ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1369 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1370 } 1371 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1372 } else { 1373 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1374 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1375 ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1376 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1377 } 1378 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1379 1380 /* load up the local column values */ 1381 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1382 cnt = 0; 1383 for (i=0; i<mat->rmap->n; i++) { 1384 for (j=B->i[i]; j<B->i[i+1]; j++) { 1385 if ( garray[B->j[j]] > cstart) break; 1386 column_values[cnt++] = B->a[j]; 1387 } 1388 for (k=A->i[i]; k<A->i[i+1]; k++) { 1389 column_values[cnt++] = A->a[k]; 1390 } 1391 for (; j<B->i[i+1]; j++) { 1392 column_values[cnt++] = B->a[j]; 1393 } 1394 } 1395 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1396 1397 /* store the column values to the file */ 1398 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1399 if (!rank) { 1400 MPI_Status status; 1401 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1402 for (i=1; i<size; i++) { 1403 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1404 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1405 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1406 ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1407 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1408 } 1409 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1410 } else { 1411 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1412 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1413 ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1414 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1415 } 1416 ierr = PetscFree(column_values);CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1422 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1423 { 1424 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1425 PetscErrorCode ierr; 1426 PetscMPIInt rank = aij->rank,size = aij->size; 1427 PetscBool isdraw,iascii,isbinary; 1428 PetscViewer sviewer; 1429 PetscViewerFormat format; 1430 1431 PetscFunctionBegin; 1432 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1433 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1434 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1435 if (iascii) { 1436 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1437 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1438 MatInfo info; 1439 PetscBool inodes; 1440 1441 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1442 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1443 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1444 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1445 if (!inodes) { 1446 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1447 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1448 } else { 1449 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1450 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1451 } 1452 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1453 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1454 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1455 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1456 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1457 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1458 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1459 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1460 PetscFunctionReturn(0); 1461 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1462 PetscInt inodecount,inodelimit,*inodes; 1463 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1464 if (inodes) { 1465 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1466 } else { 1467 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1468 } 1469 PetscFunctionReturn(0); 1470 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1471 PetscFunctionReturn(0); 1472 } 1473 } else if (isbinary) { 1474 if (size == 1) { 1475 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1476 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1477 } else { 1478 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1479 } 1480 PetscFunctionReturn(0); 1481 } else if (isdraw) { 1482 PetscDraw draw; 1483 PetscBool isnull; 1484 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1485 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1486 } 1487 1488 if (size == 1) { 1489 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1490 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1491 } else { 1492 /* assemble the entire matrix onto first processor. */ 1493 Mat A; 1494 Mat_SeqAIJ *Aloc; 1495 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1496 MatScalar *a; 1497 1498 if (mat->rmap->N > 1024) { 1499 PetscBool flg = PETSC_FALSE; 1500 1501 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1502 if (!flg) { 1503 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1504 } 1505 } 1506 1507 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1508 if (!rank) { 1509 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1510 } else { 1511 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1512 } 1513 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1514 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1515 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1516 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1517 1518 /* copy over the A part */ 1519 Aloc = (Mat_SeqAIJ*)aij->A->data; 1520 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1521 row = mat->rmap->rstart; 1522 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1523 for (i=0; i<m; i++) { 1524 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1525 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1526 } 1527 aj = Aloc->j; 1528 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1529 1530 /* copy over the B part */ 1531 Aloc = (Mat_SeqAIJ*)aij->B->data; 1532 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1533 row = mat->rmap->rstart; 1534 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1535 ct = cols; 1536 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1537 for (i=0; i<m; i++) { 1538 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1539 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1540 } 1541 ierr = PetscFree(ct);CHKERRQ(ierr); 1542 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1543 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1544 /* 1545 Everyone has to call to draw the matrix since the graphics waits are 1546 synchronized across all processors that share the PetscDraw object 1547 */ 1548 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1549 if (!rank) { 1550 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1551 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1552 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1553 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1554 } 1555 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1556 ierr = MatDestroy(&A);CHKERRQ(ierr); 1557 } 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatView_MPIAIJ" 1563 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1564 { 1565 PetscErrorCode ierr; 1566 PetscBool iascii,isdraw,issocket,isbinary; 1567 1568 PetscFunctionBegin; 1569 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1570 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1571 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1572 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1573 if (iascii || isdraw || isbinary || issocket) { 1574 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1575 } else { 1576 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1577 } 1578 PetscFunctionReturn(0); 1579 } 1580 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatSOR_MPIAIJ" 1583 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1584 { 1585 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1586 PetscErrorCode ierr; 1587 Vec bb1 = 0; 1588 PetscBool hasop; 1589 1590 PetscFunctionBegin; 1591 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1592 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1593 } 1594 1595 if (flag == SOR_APPLY_UPPER) { 1596 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1601 if (flag & SOR_ZERO_INITIAL_GUESS) { 1602 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1603 its--; 1604 } 1605 1606 while (its--) { 1607 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1608 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 1610 /* update rhs: bb1 = bb - B*x */ 1611 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1612 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1613 1614 /* local sweep */ 1615 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1616 } 1617 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1618 if (flag & SOR_ZERO_INITIAL_GUESS) { 1619 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1620 its--; 1621 } 1622 while (its--) { 1623 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 1626 /* update rhs: bb1 = bb - B*x */ 1627 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1628 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1629 1630 /* local sweep */ 1631 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1632 } 1633 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1634 if (flag & SOR_ZERO_INITIAL_GUESS) { 1635 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1636 its--; 1637 } 1638 while (its--) { 1639 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 1642 /* update rhs: bb1 = bb - B*x */ 1643 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1644 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1645 1646 /* local sweep */ 1647 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1648 } 1649 } else if (flag & SOR_EISENSTAT) { 1650 Vec xx1; 1651 1652 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1653 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1654 1655 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1656 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 if (!mat->diag) { 1658 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1659 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1660 } 1661 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1662 if (hasop) { 1663 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1664 } else { 1665 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1666 } 1667 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1668 1669 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1670 1671 /* local sweep */ 1672 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1673 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1674 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1675 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1676 1677 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatPermute_MPIAIJ" 1683 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1684 { 1685 MPI_Comm comm,pcomm; 1686 PetscInt first,local_size,nrows; 1687 const PetscInt *rows; 1688 PetscMPIInt size; 1689 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1690 PetscErrorCode ierr; 1691 1692 PetscFunctionBegin; 1693 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1694 /* make a collective version of 'rowp' */ 1695 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1696 if (pcomm==comm) { 1697 crowp = rowp; 1698 } else { 1699 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1700 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1701 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 1702 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1703 } 1704 /* collect the global row permutation and invert it */ 1705 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1706 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1707 if (pcomm!=comm) { 1708 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1709 } 1710 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1711 /* get the local target indices */ 1712 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1713 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 1714 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1715 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1716 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1717 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1718 /* the column permutation is so much easier; 1719 make a local version of 'colp' and invert it */ 1720 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 1721 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 1722 if (size==1) { 1723 lcolp = colp; 1724 } else { 1725 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 1726 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 1727 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 1728 } 1729 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1730 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1731 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 1732 if (size>1) { 1733 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 1734 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1735 } 1736 /* now we just get the submatrix */ 1737 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1738 /* clean up */ 1739 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1740 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1746 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1747 { 1748 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1749 Mat A = mat->A,B = mat->B; 1750 PetscErrorCode ierr; 1751 PetscReal isend[5],irecv[5]; 1752 1753 PetscFunctionBegin; 1754 info->block_size = 1.0; 1755 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1759 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1760 isend[3] += info->memory; isend[4] += info->mallocs; 1761 if (flag == MAT_LOCAL) { 1762 info->nz_used = isend[0]; 1763 info->nz_allocated = isend[1]; 1764 info->nz_unneeded = isend[2]; 1765 info->memory = isend[3]; 1766 info->mallocs = isend[4]; 1767 } else if (flag == MAT_GLOBAL_MAX) { 1768 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1769 info->nz_used = irecv[0]; 1770 info->nz_allocated = irecv[1]; 1771 info->nz_unneeded = irecv[2]; 1772 info->memory = irecv[3]; 1773 info->mallocs = irecv[4]; 1774 } else if (flag == MAT_GLOBAL_SUM) { 1775 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1776 info->nz_used = irecv[0]; 1777 info->nz_allocated = irecv[1]; 1778 info->nz_unneeded = irecv[2]; 1779 info->memory = irecv[3]; 1780 info->mallocs = irecv[4]; 1781 } 1782 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1783 info->fill_ratio_needed = 0; 1784 info->factor_mallocs = 0; 1785 1786 PetscFunctionReturn(0); 1787 } 1788 1789 #undef __FUNCT__ 1790 #define __FUNCT__ "MatSetOption_MPIAIJ" 1791 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1792 { 1793 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1794 PetscErrorCode ierr; 1795 1796 PetscFunctionBegin; 1797 switch (op) { 1798 case MAT_NEW_NONZERO_LOCATIONS: 1799 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1800 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1801 case MAT_KEEP_NONZERO_PATTERN: 1802 case MAT_NEW_NONZERO_LOCATION_ERR: 1803 case MAT_USE_INODES: 1804 case MAT_IGNORE_ZERO_ENTRIES: 1805 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1806 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1807 break; 1808 case MAT_ROW_ORIENTED: 1809 a->roworiented = flg; 1810 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1811 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1812 break; 1813 case MAT_NEW_DIAGONALS: 1814 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1815 break; 1816 case MAT_IGNORE_OFF_PROC_ENTRIES: 1817 a->donotstash = flg; 1818 break; 1819 case MAT_SPD: 1820 A->spd_set = PETSC_TRUE; 1821 A->spd = flg; 1822 if (flg) { 1823 A->symmetric = PETSC_TRUE; 1824 A->structurally_symmetric = PETSC_TRUE; 1825 A->symmetric_set = PETSC_TRUE; 1826 A->structurally_symmetric_set = PETSC_TRUE; 1827 } 1828 break; 1829 case MAT_SYMMETRIC: 1830 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1831 break; 1832 case MAT_STRUCTURALLY_SYMMETRIC: 1833 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1834 break; 1835 case MAT_HERMITIAN: 1836 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1837 break; 1838 case MAT_SYMMETRY_ETERNAL: 1839 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1840 break; 1841 default: 1842 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1843 } 1844 PetscFunctionReturn(0); 1845 } 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatGetRow_MPIAIJ" 1849 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1850 { 1851 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1852 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1853 PetscErrorCode ierr; 1854 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1855 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1856 PetscInt *cmap,*idx_p; 1857 1858 PetscFunctionBegin; 1859 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1860 mat->getrowactive = PETSC_TRUE; 1861 1862 if (!mat->rowvalues && (idx || v)) { 1863 /* 1864 allocate enough space to hold information from the longest row. 1865 */ 1866 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1867 PetscInt max = 1,tmp; 1868 for (i=0; i<matin->rmap->n; i++) { 1869 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1870 if (max < tmp) { max = tmp; } 1871 } 1872 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1873 } 1874 1875 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1876 lrow = row - rstart; 1877 1878 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1879 if (!v) {pvA = 0; pvB = 0;} 1880 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1881 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1882 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1883 nztot = nzA + nzB; 1884 1885 cmap = mat->garray; 1886 if (v || idx) { 1887 if (nztot) { 1888 /* Sort by increasing column numbers, assuming A and B already sorted */ 1889 PetscInt imark = -1; 1890 if (v) { 1891 *v = v_p = mat->rowvalues; 1892 for (i=0; i<nzB; i++) { 1893 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1894 else break; 1895 } 1896 imark = i; 1897 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1898 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1899 } 1900 if (idx) { 1901 *idx = idx_p = mat->rowindices; 1902 if (imark > -1) { 1903 for (i=0; i<imark; i++) { 1904 idx_p[i] = cmap[cworkB[i]]; 1905 } 1906 } else { 1907 for (i=0; i<nzB; i++) { 1908 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1909 else break; 1910 } 1911 imark = i; 1912 } 1913 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1914 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1915 } 1916 } else { 1917 if (idx) *idx = 0; 1918 if (v) *v = 0; 1919 } 1920 } 1921 *nz = nztot; 1922 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1923 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1929 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1930 { 1931 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1932 1933 PetscFunctionBegin; 1934 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1935 aij->getrowactive = PETSC_FALSE; 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatNorm_MPIAIJ" 1941 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1942 { 1943 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1944 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1945 PetscErrorCode ierr; 1946 PetscInt i,j,cstart = mat->cmap->rstart; 1947 PetscReal sum = 0.0; 1948 MatScalar *v; 1949 1950 PetscFunctionBegin; 1951 if (aij->size == 1) { 1952 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1953 } else { 1954 if (type == NORM_FROBENIUS) { 1955 v = amat->a; 1956 for (i=0; i<amat->nz; i++) { 1957 #if defined(PETSC_USE_COMPLEX) 1958 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1959 #else 1960 sum += (*v)*(*v); v++; 1961 #endif 1962 } 1963 v = bmat->a; 1964 for (i=0; i<bmat->nz; i++) { 1965 #if defined(PETSC_USE_COMPLEX) 1966 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1967 #else 1968 sum += (*v)*(*v); v++; 1969 #endif 1970 } 1971 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1972 *norm = PetscSqrtReal(*norm); 1973 } else if (type == NORM_1) { /* max column norm */ 1974 PetscReal *tmp,*tmp2; 1975 PetscInt *jj,*garray = aij->garray; 1976 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1977 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1978 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1979 *norm = 0.0; 1980 v = amat->a; jj = amat->j; 1981 for (j=0; j<amat->nz; j++) { 1982 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1983 } 1984 v = bmat->a; jj = bmat->j; 1985 for (j=0; j<bmat->nz; j++) { 1986 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1987 } 1988 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1989 for (j=0; j<mat->cmap->N; j++) { 1990 if (tmp2[j] > *norm) *norm = tmp2[j]; 1991 } 1992 ierr = PetscFree(tmp);CHKERRQ(ierr); 1993 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1994 } else if (type == NORM_INFINITY) { /* max row norm */ 1995 PetscReal ntemp = 0.0; 1996 for (j=0; j<aij->A->rmap->n; j++) { 1997 v = amat->a + amat->i[j]; 1998 sum = 0.0; 1999 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2000 sum += PetscAbsScalar(*v); v++; 2001 } 2002 v = bmat->a + bmat->i[j]; 2003 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2004 sum += PetscAbsScalar(*v); v++; 2005 } 2006 if (sum > ntemp) ntemp = sum; 2007 } 2008 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2009 } else { 2010 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2011 } 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 #undef __FUNCT__ 2017 #define __FUNCT__ "MatTranspose_MPIAIJ" 2018 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2019 { 2020 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2021 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2022 PetscErrorCode ierr; 2023 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2024 PetscInt cstart=A->cmap->rstart,ncol; 2025 Mat B; 2026 MatScalar *array; 2027 2028 PetscFunctionBegin; 2029 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2030 2031 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2032 ai = Aloc->i; aj = Aloc->j; 2033 bi = Bloc->i; bj = Bloc->j; 2034 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2035 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2036 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2037 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2038 for (i=0; i<ai[ma]; i++){ 2039 d_nnz[aj[i]] ++; 2040 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2041 } 2042 2043 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2044 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2045 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2046 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2047 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2048 } else { 2049 B = *matout; 2050 } 2051 2052 /* copy over the A part */ 2053 array = Aloc->a; 2054 row = A->rmap->rstart; 2055 for (i=0; i<ma; i++) { 2056 ncol = ai[i+1]-ai[i]; 2057 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2058 row++; array += ncol; aj += ncol; 2059 } 2060 aj = Aloc->j; 2061 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2062 2063 /* copy over the B part */ 2064 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2065 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2066 array = Bloc->a; 2067 row = A->rmap->rstart; 2068 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2069 cols_tmp = cols; 2070 for (i=0; i<mb; i++) { 2071 ncol = bi[i+1]-bi[i]; 2072 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2073 row++; array += ncol; cols_tmp += ncol; 2074 } 2075 ierr = PetscFree(cols);CHKERRQ(ierr); 2076 2077 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2080 *matout = B; 2081 } else { 2082 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2089 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2090 { 2091 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2092 Mat a = aij->A,b = aij->B; 2093 PetscErrorCode ierr; 2094 PetscInt s1,s2,s3; 2095 2096 PetscFunctionBegin; 2097 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2098 if (rr) { 2099 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2100 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2101 /* Overlap communication with computation. */ 2102 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2103 } 2104 if (ll) { 2105 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2106 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2107 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2108 } 2109 /* scale the diagonal block */ 2110 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2111 2112 if (rr) { 2113 /* Do a scatter end and then right scale the off-diagonal block */ 2114 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2115 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2116 } 2117 2118 PetscFunctionReturn(0); 2119 } 2120 2121 #undef __FUNCT__ 2122 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 2123 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 2124 { 2125 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2126 PetscErrorCode ierr; 2127 2128 PetscFunctionBegin; 2129 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2130 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2131 ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr); 2132 ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr); 2133 PetscFunctionReturn(0); 2134 } 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2137 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2138 { 2139 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2140 PetscErrorCode ierr; 2141 2142 PetscFunctionBegin; 2143 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 #undef __FUNCT__ 2148 #define __FUNCT__ "MatEqual_MPIAIJ" 2149 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2150 { 2151 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2152 Mat a,b,c,d; 2153 PetscBool flg; 2154 PetscErrorCode ierr; 2155 2156 PetscFunctionBegin; 2157 a = matA->A; b = matA->B; 2158 c = matB->A; d = matB->B; 2159 2160 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2161 if (flg) { 2162 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2163 } 2164 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatCopy_MPIAIJ" 2170 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2171 { 2172 PetscErrorCode ierr; 2173 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2174 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2175 2176 PetscFunctionBegin; 2177 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2178 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2179 /* because of the column compression in the off-processor part of the matrix a->B, 2180 the number of columns in a->B and b->B may be different, hence we cannot call 2181 the MatCopy() directly on the two parts. If need be, we can provide a more 2182 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2183 then copying the submatrices */ 2184 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2185 } else { 2186 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2187 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2188 } 2189 PetscFunctionReturn(0); 2190 } 2191 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 2194 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 2195 { 2196 PetscErrorCode ierr; 2197 2198 PetscFunctionBegin; 2199 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2200 PetscFunctionReturn(0); 2201 } 2202 2203 #undef __FUNCT__ 2204 #define __FUNCT__ "MatAXPY_MPIAIJ" 2205 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2206 { 2207 PetscErrorCode ierr; 2208 PetscInt i; 2209 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2210 PetscBLASInt bnz,one=1; 2211 Mat_SeqAIJ *x,*y; 2212 2213 PetscFunctionBegin; 2214 if (str == SAME_NONZERO_PATTERN) { 2215 PetscScalar alpha = a; 2216 x = (Mat_SeqAIJ *)xx->A->data; 2217 y = (Mat_SeqAIJ *)yy->A->data; 2218 bnz = PetscBLASIntCast(x->nz); 2219 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2220 x = (Mat_SeqAIJ *)xx->B->data; 2221 y = (Mat_SeqAIJ *)yy->B->data; 2222 bnz = PetscBLASIntCast(x->nz); 2223 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2224 } else if (str == SUBSET_NONZERO_PATTERN) { 2225 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2226 2227 x = (Mat_SeqAIJ *)xx->B->data; 2228 y = (Mat_SeqAIJ *)yy->B->data; 2229 if (y->xtoy && y->XtoY != xx->B) { 2230 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2231 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2232 } 2233 if (!y->xtoy) { /* get xtoy */ 2234 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2235 y->XtoY = xx->B; 2236 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2237 } 2238 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2239 } else { 2240 Mat B; 2241 PetscInt *nnz_d,*nnz_o; 2242 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2243 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2244 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2245 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2246 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2247 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2248 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2249 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr); 2250 ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr); 2251 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2252 ierr = MatHeaderReplace(Y,B); 2253 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2254 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2255 } 2256 PetscFunctionReturn(0); 2257 } 2258 2259 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2260 2261 #undef __FUNCT__ 2262 #define __FUNCT__ "MatConjugate_MPIAIJ" 2263 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2264 { 2265 #if defined(PETSC_USE_COMPLEX) 2266 PetscErrorCode ierr; 2267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2268 2269 PetscFunctionBegin; 2270 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2271 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2272 #else 2273 PetscFunctionBegin; 2274 #endif 2275 PetscFunctionReturn(0); 2276 } 2277 2278 #undef __FUNCT__ 2279 #define __FUNCT__ "MatRealPart_MPIAIJ" 2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2281 { 2282 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2283 PetscErrorCode ierr; 2284 2285 PetscFunctionBegin; 2286 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2287 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2300 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 #ifdef PETSC_HAVE_PBGL 2305 2306 #include <boost/parallel/mpi/bsp_process_group.hpp> 2307 #include <boost/graph/distributed/ilu_default_graph.hpp> 2308 #include <boost/graph/distributed/ilu_0_block.hpp> 2309 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2310 #include <boost/graph/distributed/petsc/interface.hpp> 2311 #include <boost/multi_array.hpp> 2312 #include <boost/parallel/distributed_property_map->hpp> 2313 2314 #undef __FUNCT__ 2315 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2316 /* 2317 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2318 */ 2319 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2320 { 2321 namespace petsc = boost::distributed::petsc; 2322 2323 namespace graph_dist = boost::graph::distributed; 2324 using boost::graph::distributed::ilu_default::process_group_type; 2325 using boost::graph::ilu_permuted; 2326 2327 PetscBool row_identity, col_identity; 2328 PetscContainer c; 2329 PetscInt m, n, M, N; 2330 PetscErrorCode ierr; 2331 2332 PetscFunctionBegin; 2333 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2334 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2335 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2336 if (!row_identity || !col_identity) { 2337 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2338 } 2339 2340 process_group_type pg; 2341 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2342 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2343 lgraph_type& level_graph = *lgraph_p; 2344 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2345 2346 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2347 ilu_permuted(level_graph); 2348 2349 /* put together the new matrix */ 2350 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2351 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2352 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2353 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2354 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2355 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2356 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2357 2358 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2359 ierr = PetscContainerSetPointer(c, lgraph_p); 2360 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2361 ierr = PetscContainerDestroy(&c); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2367 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2368 { 2369 PetscFunctionBegin; 2370 PetscFunctionReturn(0); 2371 } 2372 2373 #undef __FUNCT__ 2374 #define __FUNCT__ "MatSolve_MPIAIJ" 2375 /* 2376 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2377 */ 2378 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2379 { 2380 namespace graph_dist = boost::graph::distributed; 2381 2382 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2383 lgraph_type* lgraph_p; 2384 PetscContainer c; 2385 PetscErrorCode ierr; 2386 2387 PetscFunctionBegin; 2388 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2389 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2390 ierr = VecCopy(b, x);CHKERRQ(ierr); 2391 2392 PetscScalar* array_x; 2393 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2394 PetscInt sx; 2395 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2396 2397 PetscScalar* array_b; 2398 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2399 PetscInt sb; 2400 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2401 2402 lgraph_type& level_graph = *lgraph_p; 2403 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2404 2405 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2406 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2407 ref_x(array_x, boost::extents[num_vertices(graph)]); 2408 2409 typedef boost::iterator_property_map<array_ref_type::iterator, 2410 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2411 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2412 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2413 2414 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2415 2416 PetscFunctionReturn(0); 2417 } 2418 #endif 2419 2420 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2421 PetscInt nzlocal,nsends,nrecvs; 2422 PetscMPIInt *send_rank,*recv_rank; 2423 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2424 PetscScalar *sbuf_a,**rbuf_a; 2425 PetscErrorCode (*Destroy)(Mat); 2426 } Mat_Redundant; 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2430 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2431 { 2432 PetscErrorCode ierr; 2433 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2434 PetscInt i; 2435 2436 PetscFunctionBegin; 2437 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2438 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2439 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2440 for (i=0; i<redund->nrecvs; i++){ 2441 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2442 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2443 } 2444 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2445 ierr = PetscFree(redund);CHKERRQ(ierr); 2446 PetscFunctionReturn(0); 2447 } 2448 2449 #undef __FUNCT__ 2450 #define __FUNCT__ "MatDestroy_MatRedundant" 2451 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2452 { 2453 PetscErrorCode ierr; 2454 PetscContainer container; 2455 Mat_Redundant *redund=PETSC_NULL; 2456 2457 PetscFunctionBegin; 2458 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2459 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2460 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2461 A->ops->destroy = redund->Destroy; 2462 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2463 if (A->ops->destroy) { 2464 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2465 } 2466 PetscFunctionReturn(0); 2467 } 2468 2469 #undef __FUNCT__ 2470 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2471 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2472 { 2473 PetscMPIInt rank,size; 2474 MPI_Comm comm=((PetscObject)mat)->comm; 2475 PetscErrorCode ierr; 2476 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2477 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2478 PetscInt *rowrange=mat->rmap->range; 2479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2480 Mat A=aij->A,B=aij->B,C=*matredundant; 2481 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2482 PetscScalar *sbuf_a; 2483 PetscInt nzlocal=a->nz+b->nz; 2484 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2485 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2486 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2487 MatScalar *aworkA,*aworkB; 2488 PetscScalar *vals; 2489 PetscMPIInt tag1,tag2,tag3,imdex; 2490 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2491 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2492 MPI_Status recv_status,*send_status; 2493 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2494 PetscInt **rbuf_j=PETSC_NULL; 2495 PetscScalar **rbuf_a=PETSC_NULL; 2496 Mat_Redundant *redund=PETSC_NULL; 2497 PetscContainer container; 2498 2499 PetscFunctionBegin; 2500 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2501 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2502 2503 if (reuse == MAT_REUSE_MATRIX) { 2504 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2505 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2506 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2507 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2508 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2509 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2510 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2511 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2512 2513 nsends = redund->nsends; 2514 nrecvs = redund->nrecvs; 2515 send_rank = redund->send_rank; 2516 recv_rank = redund->recv_rank; 2517 sbuf_nz = redund->sbuf_nz; 2518 rbuf_nz = redund->rbuf_nz; 2519 sbuf_j = redund->sbuf_j; 2520 sbuf_a = redund->sbuf_a; 2521 rbuf_j = redund->rbuf_j; 2522 rbuf_a = redund->rbuf_a; 2523 } 2524 2525 if (reuse == MAT_INITIAL_MATRIX){ 2526 PetscMPIInt subrank,subsize; 2527 PetscInt nleftover,np_subcomm; 2528 /* get the destination processors' id send_rank, nsends and nrecvs */ 2529 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2530 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2531 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2532 np_subcomm = size/nsubcomm; 2533 nleftover = size - nsubcomm*np_subcomm; 2534 nsends = 0; nrecvs = 0; 2535 for (i=0; i<size; i++){ /* i=rank*/ 2536 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2537 send_rank[nsends] = i; nsends++; 2538 recv_rank[nrecvs++] = i; 2539 } 2540 } 2541 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2542 i = size-nleftover-1; 2543 j = 0; 2544 while (j < nsubcomm - nleftover){ 2545 send_rank[nsends++] = i; 2546 i--; j++; 2547 } 2548 } 2549 2550 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2551 for (i=0; i<nleftover; i++){ 2552 recv_rank[nrecvs++] = size-nleftover+i; 2553 } 2554 } 2555 2556 /* allocate sbuf_j, sbuf_a */ 2557 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2558 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2559 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2560 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2561 2562 /* copy mat's local entries into the buffers */ 2563 if (reuse == MAT_INITIAL_MATRIX){ 2564 rownz_max = 0; 2565 rptr = sbuf_j; 2566 cols = sbuf_j + rend-rstart + 1; 2567 vals = sbuf_a; 2568 rptr[0] = 0; 2569 for (i=0; i<rend-rstart; i++){ 2570 row = i + rstart; 2571 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2572 ncols = nzA + nzB; 2573 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2574 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2575 /* load the column indices for this row into cols */ 2576 lwrite = 0; 2577 for (l=0; l<nzB; l++) { 2578 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2579 vals[lwrite] = aworkB[l]; 2580 cols[lwrite++] = ctmp; 2581 } 2582 } 2583 for (l=0; l<nzA; l++){ 2584 vals[lwrite] = aworkA[l]; 2585 cols[lwrite++] = cstart + cworkA[l]; 2586 } 2587 for (l=0; l<nzB; l++) { 2588 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2589 vals[lwrite] = aworkB[l]; 2590 cols[lwrite++] = ctmp; 2591 } 2592 } 2593 vals += ncols; 2594 cols += ncols; 2595 rptr[i+1] = rptr[i] + ncols; 2596 if (rownz_max < ncols) rownz_max = ncols; 2597 } 2598 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2599 } else { /* only copy matrix values into sbuf_a */ 2600 rptr = sbuf_j; 2601 vals = sbuf_a; 2602 rptr[0] = 0; 2603 for (i=0; i<rend-rstart; i++){ 2604 row = i + rstart; 2605 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2606 ncols = nzA + nzB; 2607 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2608 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2609 lwrite = 0; 2610 for (l=0; l<nzB; l++) { 2611 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2612 } 2613 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2614 for (l=0; l<nzB; l++) { 2615 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2616 } 2617 vals += ncols; 2618 rptr[i+1] = rptr[i] + ncols; 2619 } 2620 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2621 2622 /* send nzlocal to others, and recv other's nzlocal */ 2623 /*--------------------------------------------------*/ 2624 if (reuse == MAT_INITIAL_MATRIX){ 2625 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2626 s_waits2 = s_waits3 + nsends; 2627 s_waits1 = s_waits2 + nsends; 2628 r_waits1 = s_waits1 + nsends; 2629 r_waits2 = r_waits1 + nrecvs; 2630 r_waits3 = r_waits2 + nrecvs; 2631 } else { 2632 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2633 r_waits3 = s_waits3 + nsends; 2634 } 2635 2636 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2637 if (reuse == MAT_INITIAL_MATRIX){ 2638 /* get new tags to keep the communication clean */ 2639 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2640 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2641 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2642 2643 /* post receives of other's nzlocal */ 2644 for (i=0; i<nrecvs; i++){ 2645 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2646 } 2647 /* send nzlocal to others */ 2648 for (i=0; i<nsends; i++){ 2649 sbuf_nz[i] = nzlocal; 2650 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2651 } 2652 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2653 count = nrecvs; 2654 while (count) { 2655 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2656 recv_rank[imdex] = recv_status.MPI_SOURCE; 2657 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2658 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2659 2660 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2661 rbuf_nz[imdex] += i + 2; 2662 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2663 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2664 count--; 2665 } 2666 /* wait on sends of nzlocal */ 2667 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2668 /* send mat->i,j to others, and recv from other's */ 2669 /*------------------------------------------------*/ 2670 for (i=0; i<nsends; i++){ 2671 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2672 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2673 } 2674 /* wait on receives of mat->i,j */ 2675 /*------------------------------*/ 2676 count = nrecvs; 2677 while (count) { 2678 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2679 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2680 count--; 2681 } 2682 /* wait on sends of mat->i,j */ 2683 /*---------------------------*/ 2684 if (nsends) { 2685 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2686 } 2687 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2688 2689 /* post receives, send and receive mat->a */ 2690 /*----------------------------------------*/ 2691 for (imdex=0; imdex<nrecvs; imdex++) { 2692 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2693 } 2694 for (i=0; i<nsends; i++){ 2695 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2696 } 2697 count = nrecvs; 2698 while (count) { 2699 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2700 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2701 count--; 2702 } 2703 if (nsends) { 2704 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2705 } 2706 2707 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2708 2709 /* create redundant matrix */ 2710 /*-------------------------*/ 2711 if (reuse == MAT_INITIAL_MATRIX){ 2712 /* compute rownz_max for preallocation */ 2713 for (imdex=0; imdex<nrecvs; imdex++){ 2714 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2715 rptr = rbuf_j[imdex]; 2716 for (i=0; i<j; i++){ 2717 ncols = rptr[i+1] - rptr[i]; 2718 if (rownz_max < ncols) rownz_max = ncols; 2719 } 2720 } 2721 2722 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2723 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2724 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2725 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2726 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2727 } else { 2728 C = *matredundant; 2729 } 2730 2731 /* insert local matrix entries */ 2732 rptr = sbuf_j; 2733 cols = sbuf_j + rend-rstart + 1; 2734 vals = sbuf_a; 2735 for (i=0; i<rend-rstart; i++){ 2736 row = i + rstart; 2737 ncols = rptr[i+1] - rptr[i]; 2738 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2739 vals += ncols; 2740 cols += ncols; 2741 } 2742 /* insert received matrix entries */ 2743 for (imdex=0; imdex<nrecvs; imdex++){ 2744 rstart = rowrange[recv_rank[imdex]]; 2745 rend = rowrange[recv_rank[imdex]+1]; 2746 rptr = rbuf_j[imdex]; 2747 cols = rbuf_j[imdex] + rend-rstart + 1; 2748 vals = rbuf_a[imdex]; 2749 for (i=0; i<rend-rstart; i++){ 2750 row = i + rstart; 2751 ncols = rptr[i+1] - rptr[i]; 2752 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2753 vals += ncols; 2754 cols += ncols; 2755 } 2756 } 2757 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2758 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2759 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2760 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2761 if (reuse == MAT_INITIAL_MATRIX) { 2762 PetscContainer container; 2763 *matredundant = C; 2764 /* create a supporting struct and attach it to C for reuse */ 2765 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2766 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2767 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2768 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2769 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2770 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2771 2772 redund->nzlocal = nzlocal; 2773 redund->nsends = nsends; 2774 redund->nrecvs = nrecvs; 2775 redund->send_rank = send_rank; 2776 redund->recv_rank = recv_rank; 2777 redund->sbuf_nz = sbuf_nz; 2778 redund->rbuf_nz = rbuf_nz; 2779 redund->sbuf_j = sbuf_j; 2780 redund->sbuf_a = sbuf_a; 2781 redund->rbuf_j = rbuf_j; 2782 redund->rbuf_a = rbuf_a; 2783 2784 redund->Destroy = C->ops->destroy; 2785 C->ops->destroy = MatDestroy_MatRedundant; 2786 } 2787 PetscFunctionReturn(0); 2788 } 2789 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2792 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2793 { 2794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2795 PetscErrorCode ierr; 2796 PetscInt i,*idxb = 0; 2797 PetscScalar *va,*vb; 2798 Vec vtmp; 2799 2800 PetscFunctionBegin; 2801 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2802 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2803 if (idx) { 2804 for (i=0; i<A->rmap->n; i++) { 2805 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2806 } 2807 } 2808 2809 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2810 if (idx) { 2811 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2812 } 2813 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2814 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2815 2816 for (i=0; i<A->rmap->n; i++){ 2817 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2818 va[i] = vb[i]; 2819 if (idx) idx[i] = a->garray[idxb[i]]; 2820 } 2821 } 2822 2823 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2824 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2825 ierr = PetscFree(idxb);CHKERRQ(ierr); 2826 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2827 PetscFunctionReturn(0); 2828 } 2829 2830 #undef __FUNCT__ 2831 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2832 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2833 { 2834 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2835 PetscErrorCode ierr; 2836 PetscInt i,*idxb = 0; 2837 PetscScalar *va,*vb; 2838 Vec vtmp; 2839 2840 PetscFunctionBegin; 2841 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2842 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2843 if (idx) { 2844 for (i=0; i<A->cmap->n; i++) { 2845 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2846 } 2847 } 2848 2849 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2850 if (idx) { 2851 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2852 } 2853 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2854 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2855 2856 for (i=0; i<A->rmap->n; i++){ 2857 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2858 va[i] = vb[i]; 2859 if (idx) idx[i] = a->garray[idxb[i]]; 2860 } 2861 } 2862 2863 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2864 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2865 ierr = PetscFree(idxb);CHKERRQ(ierr); 2866 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2867 PetscFunctionReturn(0); 2868 } 2869 2870 #undef __FUNCT__ 2871 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2872 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2873 { 2874 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2875 PetscInt n = A->rmap->n; 2876 PetscInt cstart = A->cmap->rstart; 2877 PetscInt *cmap = mat->garray; 2878 PetscInt *diagIdx, *offdiagIdx; 2879 Vec diagV, offdiagV; 2880 PetscScalar *a, *diagA, *offdiagA; 2881 PetscInt r; 2882 PetscErrorCode ierr; 2883 2884 PetscFunctionBegin; 2885 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2886 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2887 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2888 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2889 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2890 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2891 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2892 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2893 for(r = 0; r < n; ++r) { 2894 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2895 a[r] = diagA[r]; 2896 idx[r] = cstart + diagIdx[r]; 2897 } else { 2898 a[r] = offdiagA[r]; 2899 idx[r] = cmap[offdiagIdx[r]]; 2900 } 2901 } 2902 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2903 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2904 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2905 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2906 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2907 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2908 PetscFunctionReturn(0); 2909 } 2910 2911 #undef __FUNCT__ 2912 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2913 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2914 { 2915 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2916 PetscInt n = A->rmap->n; 2917 PetscInt cstart = A->cmap->rstart; 2918 PetscInt *cmap = mat->garray; 2919 PetscInt *diagIdx, *offdiagIdx; 2920 Vec diagV, offdiagV; 2921 PetscScalar *a, *diagA, *offdiagA; 2922 PetscInt r; 2923 PetscErrorCode ierr; 2924 2925 PetscFunctionBegin; 2926 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2927 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2928 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2929 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2930 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2931 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2932 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2933 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2934 for(r = 0; r < n; ++r) { 2935 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2936 a[r] = diagA[r]; 2937 idx[r] = cstart + diagIdx[r]; 2938 } else { 2939 a[r] = offdiagA[r]; 2940 idx[r] = cmap[offdiagIdx[r]]; 2941 } 2942 } 2943 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2944 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2945 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2946 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2947 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2948 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2949 PetscFunctionReturn(0); 2950 } 2951 2952 #undef __FUNCT__ 2953 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2954 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2955 { 2956 PetscErrorCode ierr; 2957 Mat *dummy; 2958 2959 PetscFunctionBegin; 2960 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2961 *newmat = *dummy; 2962 ierr = PetscFree(dummy);CHKERRQ(ierr); 2963 PetscFunctionReturn(0); 2964 } 2965 2966 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2967 2968 #undef __FUNCT__ 2969 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2970 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2971 { 2972 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2973 PetscErrorCode ierr; 2974 2975 PetscFunctionBegin; 2976 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2977 PetscFunctionReturn(0); 2978 } 2979 2980 2981 /* -------------------------------------------------------------------*/ 2982 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2983 MatGetRow_MPIAIJ, 2984 MatRestoreRow_MPIAIJ, 2985 MatMult_MPIAIJ, 2986 /* 4*/ MatMultAdd_MPIAIJ, 2987 MatMultTranspose_MPIAIJ, 2988 MatMultTransposeAdd_MPIAIJ, 2989 #ifdef PETSC_HAVE_PBGL 2990 MatSolve_MPIAIJ, 2991 #else 2992 0, 2993 #endif 2994 0, 2995 0, 2996 /*10*/ 0, 2997 0, 2998 0, 2999 MatSOR_MPIAIJ, 3000 MatTranspose_MPIAIJ, 3001 /*15*/ MatGetInfo_MPIAIJ, 3002 MatEqual_MPIAIJ, 3003 MatGetDiagonal_MPIAIJ, 3004 MatDiagonalScale_MPIAIJ, 3005 MatNorm_MPIAIJ, 3006 /*20*/ MatAssemblyBegin_MPIAIJ, 3007 MatAssemblyEnd_MPIAIJ, 3008 MatSetOption_MPIAIJ, 3009 MatZeroEntries_MPIAIJ, 3010 /*24*/ MatZeroRows_MPIAIJ, 3011 0, 3012 #ifdef PETSC_HAVE_PBGL 3013 0, 3014 #else 3015 0, 3016 #endif 3017 0, 3018 0, 3019 /*29*/ MatSetUpPreallocation_MPIAIJ, 3020 #ifdef PETSC_HAVE_PBGL 3021 0, 3022 #else 3023 0, 3024 #endif 3025 0, 3026 0, 3027 0, 3028 /*34*/ MatDuplicate_MPIAIJ, 3029 0, 3030 0, 3031 0, 3032 0, 3033 /*39*/ MatAXPY_MPIAIJ, 3034 MatGetSubMatrices_MPIAIJ, 3035 MatIncreaseOverlap_MPIAIJ, 3036 MatGetValues_MPIAIJ, 3037 MatCopy_MPIAIJ, 3038 /*44*/ MatGetRowMax_MPIAIJ, 3039 MatScale_MPIAIJ, 3040 0, 3041 0, 3042 MatZeroRowsColumns_MPIAIJ, 3043 /*49*/ MatSetBlockSize_MPIAIJ, 3044 0, 3045 0, 3046 0, 3047 0, 3048 /*54*/ MatFDColoringCreate_MPIAIJ, 3049 0, 3050 MatSetUnfactored_MPIAIJ, 3051 MatPermute_MPIAIJ, 3052 0, 3053 /*59*/ MatGetSubMatrix_MPIAIJ, 3054 MatDestroy_MPIAIJ, 3055 MatView_MPIAIJ, 3056 0, 3057 0, 3058 /*64*/ 0, 3059 0, 3060 0, 3061 0, 3062 0, 3063 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3064 MatGetRowMinAbs_MPIAIJ, 3065 0, 3066 MatSetColoring_MPIAIJ, 3067 #if defined(PETSC_HAVE_ADIC) 3068 MatSetValuesAdic_MPIAIJ, 3069 #else 3070 0, 3071 #endif 3072 MatSetValuesAdifor_MPIAIJ, 3073 /*75*/ MatFDColoringApply_AIJ, 3074 0, 3075 0, 3076 0, 3077 0, 3078 /*80*/ 0, 3079 0, 3080 0, 3081 /*83*/ MatLoad_MPIAIJ, 3082 0, 3083 0, 3084 0, 3085 0, 3086 0, 3087 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3088 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3089 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3090 MatPtAP_Basic, 3091 MatPtAPSymbolic_MPIAIJ, 3092 /*94*/ MatPtAPNumeric_MPIAIJ, 3093 0, 3094 0, 3095 0, 3096 0, 3097 /*99*/ 0, 3098 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3099 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3100 MatConjugate_MPIAIJ, 3101 0, 3102 /*104*/MatSetValuesRow_MPIAIJ, 3103 MatRealPart_MPIAIJ, 3104 MatImaginaryPart_MPIAIJ, 3105 0, 3106 0, 3107 /*109*/0, 3108 MatGetRedundantMatrix_MPIAIJ, 3109 MatGetRowMin_MPIAIJ, 3110 0, 3111 0, 3112 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3113 0, 3114 0, 3115 0, 3116 0, 3117 /*119*/0, 3118 0, 3119 0, 3120 0, 3121 MatGetMultiProcBlock_MPIAIJ, 3122 /*124*/MatFindNonZeroRows_MPIAIJ, 3123 MatGetColumnNorms_MPIAIJ, 3124 MatInvertBlockDiagonal_MPIAIJ, 3125 0, 3126 MatGetSubMatricesParallel_MPIAIJ, 3127 /*129*/0 3128 }; 3129 3130 /* ----------------------------------------------------------------------------------------*/ 3131 3132 EXTERN_C_BEGIN 3133 #undef __FUNCT__ 3134 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3135 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3136 { 3137 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3138 PetscErrorCode ierr; 3139 3140 PetscFunctionBegin; 3141 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3142 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3143 PetscFunctionReturn(0); 3144 } 3145 EXTERN_C_END 3146 3147 EXTERN_C_BEGIN 3148 #undef __FUNCT__ 3149 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3150 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3151 { 3152 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3153 PetscErrorCode ierr; 3154 3155 PetscFunctionBegin; 3156 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3157 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3158 PetscFunctionReturn(0); 3159 } 3160 EXTERN_C_END 3161 3162 EXTERN_C_BEGIN 3163 #undef __FUNCT__ 3164 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3165 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3166 { 3167 Mat_MPIAIJ *b; 3168 PetscErrorCode ierr; 3169 PetscInt i; 3170 3171 PetscFunctionBegin; 3172 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3173 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3174 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3175 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3176 3177 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3178 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3179 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3180 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3181 if (d_nnz) { 3182 for (i=0; i<B->rmap->n; i++) { 3183 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3184 } 3185 } 3186 if (o_nnz) { 3187 for (i=0; i<B->rmap->n; i++) { 3188 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3189 } 3190 } 3191 b = (Mat_MPIAIJ*)B->data; 3192 3193 if (!B->preallocated) { 3194 /* Explicitly create 2 MATSEQAIJ matrices. */ 3195 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3196 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3197 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3198 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3199 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3200 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3201 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3202 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3203 } 3204 3205 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3206 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3207 B->preallocated = PETSC_TRUE; 3208 PetscFunctionReturn(0); 3209 } 3210 EXTERN_C_END 3211 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3214 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3215 { 3216 Mat mat; 3217 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3218 PetscErrorCode ierr; 3219 3220 PetscFunctionBegin; 3221 *newmat = 0; 3222 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3223 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3224 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3225 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3226 a = (Mat_MPIAIJ*)mat->data; 3227 3228 mat->factortype = matin->factortype; 3229 mat->rmap->bs = matin->rmap->bs; 3230 mat->assembled = PETSC_TRUE; 3231 mat->insertmode = NOT_SET_VALUES; 3232 mat->preallocated = PETSC_TRUE; 3233 3234 a->size = oldmat->size; 3235 a->rank = oldmat->rank; 3236 a->donotstash = oldmat->donotstash; 3237 a->roworiented = oldmat->roworiented; 3238 a->rowindices = 0; 3239 a->rowvalues = 0; 3240 a->getrowactive = PETSC_FALSE; 3241 3242 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3243 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3244 3245 if (oldmat->colmap) { 3246 #if defined (PETSC_USE_CTABLE) 3247 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3248 #else 3249 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3250 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3251 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3252 #endif 3253 } else a->colmap = 0; 3254 if (oldmat->garray) { 3255 PetscInt len; 3256 len = oldmat->B->cmap->n; 3257 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3258 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3259 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3260 } else a->garray = 0; 3261 3262 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3263 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3264 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3265 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3266 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3267 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3268 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3269 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3270 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3271 *newmat = mat; 3272 PetscFunctionReturn(0); 3273 } 3274 3275 3276 3277 #undef __FUNCT__ 3278 #define __FUNCT__ "MatLoad_MPIAIJ" 3279 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3280 { 3281 PetscScalar *vals,*svals; 3282 MPI_Comm comm = ((PetscObject)viewer)->comm; 3283 PetscErrorCode ierr; 3284 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3285 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3286 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3287 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3288 PetscInt cend,cstart,n,*rowners,sizesset=1; 3289 int fd; 3290 3291 PetscFunctionBegin; 3292 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3293 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3294 if (!rank) { 3295 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3296 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3297 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3298 } 3299 3300 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3301 3302 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3303 M = header[1]; N = header[2]; 3304 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3305 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3306 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3307 3308 /* If global sizes are set, check if they are consistent with that given in the file */ 3309 if (sizesset) { 3310 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3311 } 3312 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3313 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3314 3315 /* determine ownership of all rows */ 3316 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3317 else m = newMat->rmap->n; /* Set by user */ 3318 3319 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3320 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3321 3322 /* First process needs enough room for process with most rows */ 3323 if (!rank) { 3324 mmax = rowners[1]; 3325 for (i=2; i<size; i++) { 3326 mmax = PetscMax(mmax,rowners[i]); 3327 } 3328 } else mmax = m; 3329 3330 rowners[0] = 0; 3331 for (i=2; i<=size; i++) { 3332 rowners[i] += rowners[i-1]; 3333 } 3334 rstart = rowners[rank]; 3335 rend = rowners[rank+1]; 3336 3337 /* distribute row lengths to all processors */ 3338 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3339 if (!rank) { 3340 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3341 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3342 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3343 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3344 for (j=0; j<m; j++) { 3345 procsnz[0] += ourlens[j]; 3346 } 3347 for (i=1; i<size; i++) { 3348 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3349 /* calculate the number of nonzeros on each processor */ 3350 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3351 procsnz[i] += rowlengths[j]; 3352 } 3353 ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3354 } 3355 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3356 } else { 3357 ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3358 } 3359 3360 if (!rank) { 3361 /* determine max buffer needed and allocate it */ 3362 maxnz = 0; 3363 for (i=0; i<size; i++) { 3364 maxnz = PetscMax(maxnz,procsnz[i]); 3365 } 3366 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3367 3368 /* read in my part of the matrix column indices */ 3369 nz = procsnz[0]; 3370 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3371 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3372 3373 /* read in every one elses and ship off */ 3374 for (i=1; i<size; i++) { 3375 nz = procsnz[i]; 3376 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3377 ierr = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3378 } 3379 ierr = PetscFree(cols);CHKERRQ(ierr); 3380 } else { 3381 /* determine buffer space needed for message */ 3382 nz = 0; 3383 for (i=0; i<m; i++) { 3384 nz += ourlens[i]; 3385 } 3386 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3387 3388 /* receive message of column indices*/ 3389 ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3390 } 3391 3392 /* determine column ownership if matrix is not square */ 3393 if (N != M) { 3394 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3395 else n = newMat->cmap->n; 3396 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3397 cstart = cend - n; 3398 } else { 3399 cstart = rstart; 3400 cend = rend; 3401 n = cend - cstart; 3402 } 3403 3404 /* loop over local rows, determining number of off diagonal entries */ 3405 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3406 jj = 0; 3407 for (i=0; i<m; i++) { 3408 for (j=0; j<ourlens[i]; j++) { 3409 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3410 jj++; 3411 } 3412 } 3413 3414 for (i=0; i<m; i++) { 3415 ourlens[i] -= offlens[i]; 3416 } 3417 if (!sizesset) { 3418 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3419 } 3420 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3421 3422 for (i=0; i<m; i++) { 3423 ourlens[i] += offlens[i]; 3424 } 3425 3426 if (!rank) { 3427 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3428 3429 /* read in my part of the matrix numerical values */ 3430 nz = procsnz[0]; 3431 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3432 3433 /* insert into matrix */ 3434 jj = rstart; 3435 smycols = mycols; 3436 svals = vals; 3437 for (i=0; i<m; i++) { 3438 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3439 smycols += ourlens[i]; 3440 svals += ourlens[i]; 3441 jj++; 3442 } 3443 3444 /* read in other processors and ship out */ 3445 for (i=1; i<size; i++) { 3446 nz = procsnz[i]; 3447 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3448 ierr = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3449 } 3450 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3451 } else { 3452 /* receive numeric values */ 3453 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3454 3455 /* receive message of values*/ 3456 ierr = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3457 3458 /* insert into matrix */ 3459 jj = rstart; 3460 smycols = mycols; 3461 svals = vals; 3462 for (i=0; i<m; i++) { 3463 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3464 smycols += ourlens[i]; 3465 svals += ourlens[i]; 3466 jj++; 3467 } 3468 } 3469 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3470 ierr = PetscFree(vals);CHKERRQ(ierr); 3471 ierr = PetscFree(mycols);CHKERRQ(ierr); 3472 ierr = PetscFree(rowners);CHKERRQ(ierr); 3473 3474 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3475 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3476 PetscFunctionReturn(0); 3477 } 3478 3479 #undef __FUNCT__ 3480 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3481 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3482 { 3483 PetscErrorCode ierr; 3484 IS iscol_local; 3485 PetscInt csize; 3486 3487 PetscFunctionBegin; 3488 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3489 if (call == MAT_REUSE_MATRIX) { 3490 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3491 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3492 } else { 3493 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3494 } 3495 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3496 if (call == MAT_INITIAL_MATRIX) { 3497 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3498 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3499 } 3500 PetscFunctionReturn(0); 3501 } 3502 3503 #undef __FUNCT__ 3504 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3505 /* 3506 Not great since it makes two copies of the submatrix, first an SeqAIJ 3507 in local and then by concatenating the local matrices the end result. 3508 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3509 3510 Note: This requires a sequential iscol with all indices. 3511 */ 3512 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3513 { 3514 PetscErrorCode ierr; 3515 PetscMPIInt rank,size; 3516 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 3517 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3518 Mat *local,M,Mreuse; 3519 MatScalar *vwork,*aa; 3520 MPI_Comm comm = ((PetscObject)mat)->comm; 3521 Mat_SeqAIJ *aij; 3522 3523 3524 PetscFunctionBegin; 3525 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3527 3528 if (call == MAT_REUSE_MATRIX) { 3529 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3530 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3531 local = &Mreuse; 3532 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3533 } else { 3534 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3535 Mreuse = *local; 3536 ierr = PetscFree(local);CHKERRQ(ierr); 3537 } 3538 3539 /* 3540 m - number of local rows 3541 n - number of columns (same on all processors) 3542 rstart - first row in new global matrix generated 3543 */ 3544 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3545 if (call == MAT_INITIAL_MATRIX) { 3546 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3547 ii = aij->i; 3548 jj = aij->j; 3549 3550 /* 3551 Determine the number of non-zeros in the diagonal and off-diagonal 3552 portions of the matrix in order to do correct preallocation 3553 */ 3554 3555 /* first get start and end of "diagonal" columns */ 3556 if (csize == PETSC_DECIDE) { 3557 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3558 if (mglobal == n) { /* square matrix */ 3559 nlocal = m; 3560 } else { 3561 nlocal = n/size + ((n % size) > rank); 3562 } 3563 } else { 3564 nlocal = csize; 3565 } 3566 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3567 rstart = rend - nlocal; 3568 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3569 3570 /* next, compute all the lengths */ 3571 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3572 olens = dlens + m; 3573 for (i=0; i<m; i++) { 3574 jend = ii[i+1] - ii[i]; 3575 olen = 0; 3576 dlen = 0; 3577 for (j=0; j<jend; j++) { 3578 if (*jj < rstart || *jj >= rend) olen++; 3579 else dlen++; 3580 jj++; 3581 } 3582 olens[i] = olen; 3583 dlens[i] = dlen; 3584 } 3585 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3586 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3587 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3588 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3589 ierr = PetscFree(dlens);CHKERRQ(ierr); 3590 } else { 3591 PetscInt ml,nl; 3592 3593 M = *newmat; 3594 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3595 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3596 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3597 /* 3598 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3599 rather than the slower MatSetValues(). 3600 */ 3601 M->was_assembled = PETSC_TRUE; 3602 M->assembled = PETSC_FALSE; 3603 } 3604 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3605 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3606 ii = aij->i; 3607 jj = aij->j; 3608 aa = aij->a; 3609 for (i=0; i<m; i++) { 3610 row = rstart + i; 3611 nz = ii[i+1] - ii[i]; 3612 cwork = jj; jj += nz; 3613 vwork = aa; aa += nz; 3614 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3615 } 3616 3617 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3618 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3619 *newmat = M; 3620 3621 /* save submatrix used in processor for next request */ 3622 if (call == MAT_INITIAL_MATRIX) { 3623 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3624 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3625 } 3626 3627 PetscFunctionReturn(0); 3628 } 3629 3630 EXTERN_C_BEGIN 3631 #undef __FUNCT__ 3632 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3633 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3634 { 3635 PetscInt m,cstart, cend,j,nnz,i,d; 3636 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3637 const PetscInt *JJ; 3638 PetscScalar *values; 3639 PetscErrorCode ierr; 3640 3641 PetscFunctionBegin; 3642 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3643 3644 ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 3645 ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 3646 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3647 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3648 m = B->rmap->n; 3649 cstart = B->cmap->rstart; 3650 cend = B->cmap->rend; 3651 rstart = B->rmap->rstart; 3652 3653 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3654 3655 #if defined(PETSC_USE_DEBUGGING) 3656 for (i=0; i<m; i++) { 3657 nnz = Ii[i+1]- Ii[i]; 3658 JJ = J + Ii[i]; 3659 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3660 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3661 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3662 } 3663 #endif 3664 3665 for (i=0; i<m; i++) { 3666 nnz = Ii[i+1]- Ii[i]; 3667 JJ = J + Ii[i]; 3668 nnz_max = PetscMax(nnz_max,nnz); 3669 d = 0; 3670 for (j=0; j<nnz; j++) { 3671 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3672 } 3673 d_nnz[i] = d; 3674 o_nnz[i] = nnz - d; 3675 } 3676 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3677 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3678 3679 if (v) values = (PetscScalar*)v; 3680 else { 3681 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3682 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3683 } 3684 3685 for (i=0; i<m; i++) { 3686 ii = i + rstart; 3687 nnz = Ii[i+1]- Ii[i]; 3688 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3689 } 3690 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3691 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3692 3693 if (!v) { 3694 ierr = PetscFree(values);CHKERRQ(ierr); 3695 } 3696 PetscFunctionReturn(0); 3697 } 3698 EXTERN_C_END 3699 3700 #undef __FUNCT__ 3701 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3702 /*@ 3703 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3704 (the default parallel PETSc format). 3705 3706 Collective on MPI_Comm 3707 3708 Input Parameters: 3709 + B - the matrix 3710 . i - the indices into j for the start of each local row (starts with zero) 3711 . j - the column indices for each local row (starts with zero) 3712 - v - optional values in the matrix 3713 3714 Level: developer 3715 3716 Notes: 3717 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3718 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3719 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3720 3721 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3722 3723 The format which is used for the sparse matrix input, is equivalent to a 3724 row-major ordering.. i.e for the following matrix, the input data expected is 3725 as shown: 3726 3727 1 0 0 3728 2 0 3 P0 3729 ------- 3730 4 5 6 P1 3731 3732 Process0 [P0]: rows_owned=[0,1] 3733 i = {0,1,3} [size = nrow+1 = 2+1] 3734 j = {0,0,2} [size = nz = 6] 3735 v = {1,2,3} [size = nz = 6] 3736 3737 Process1 [P1]: rows_owned=[2] 3738 i = {0,3} [size = nrow+1 = 1+1] 3739 j = {0,1,2} [size = nz = 6] 3740 v = {4,5,6} [size = nz = 6] 3741 3742 .keywords: matrix, aij, compressed row, sparse, parallel 3743 3744 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 3745 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3746 @*/ 3747 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3748 { 3749 PetscErrorCode ierr; 3750 3751 PetscFunctionBegin; 3752 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3753 PetscFunctionReturn(0); 3754 } 3755 3756 #undef __FUNCT__ 3757 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3758 /*@C 3759 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3760 (the default parallel PETSc format). For good matrix assembly performance 3761 the user should preallocate the matrix storage by setting the parameters 3762 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3763 performance can be increased by more than a factor of 50. 3764 3765 Collective on MPI_Comm 3766 3767 Input Parameters: 3768 + A - the matrix 3769 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3770 (same value is used for all local rows) 3771 . d_nnz - array containing the number of nonzeros in the various rows of the 3772 DIAGONAL portion of the local submatrix (possibly different for each row) 3773 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3774 The size of this array is equal to the number of local rows, i.e 'm'. 3775 You must leave room for the diagonal entry even if it is zero. 3776 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3777 submatrix (same value is used for all local rows). 3778 - o_nnz - array containing the number of nonzeros in the various rows of the 3779 OFF-DIAGONAL portion of the local submatrix (possibly different for 3780 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3781 structure. The size of this array is equal to the number 3782 of local rows, i.e 'm'. 3783 3784 If the *_nnz parameter is given then the *_nz parameter is ignored 3785 3786 The AIJ format (also called the Yale sparse matrix format or 3787 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3788 storage. The stored row and column indices begin with zero. 3789 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3790 3791 The parallel matrix is partitioned such that the first m0 rows belong to 3792 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3793 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3794 3795 The DIAGONAL portion of the local submatrix of a processor can be defined 3796 as the submatrix which is obtained by extraction the part corresponding to 3797 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3798 first row that belongs to the processor, r2 is the last row belonging to 3799 the this processor, and c1-c2 is range of indices of the local part of a 3800 vector suitable for applying the matrix to. This is an mxn matrix. In the 3801 common case of a square matrix, the row and column ranges are the same and 3802 the DIAGONAL part is also square. The remaining portion of the local 3803 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3804 3805 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3806 3807 You can call MatGetInfo() to get information on how effective the preallocation was; 3808 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3809 You can also run with the option -info and look for messages with the string 3810 malloc in them to see if additional memory allocation was needed. 3811 3812 Example usage: 3813 3814 Consider the following 8x8 matrix with 34 non-zero values, that is 3815 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3816 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3817 as follows: 3818 3819 .vb 3820 1 2 0 | 0 3 0 | 0 4 3821 Proc0 0 5 6 | 7 0 0 | 8 0 3822 9 0 10 | 11 0 0 | 12 0 3823 ------------------------------------- 3824 13 0 14 | 15 16 17 | 0 0 3825 Proc1 0 18 0 | 19 20 21 | 0 0 3826 0 0 0 | 22 23 0 | 24 0 3827 ------------------------------------- 3828 Proc2 25 26 27 | 0 0 28 | 29 0 3829 30 0 0 | 31 32 33 | 0 34 3830 .ve 3831 3832 This can be represented as a collection of submatrices as: 3833 3834 .vb 3835 A B C 3836 D E F 3837 G H I 3838 .ve 3839 3840 Where the submatrices A,B,C are owned by proc0, D,E,F are 3841 owned by proc1, G,H,I are owned by proc2. 3842 3843 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3844 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3845 The 'M','N' parameters are 8,8, and have the same values on all procs. 3846 3847 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3848 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3849 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3850 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3851 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3852 matrix, ans [DF] as another SeqAIJ matrix. 3853 3854 When d_nz, o_nz parameters are specified, d_nz storage elements are 3855 allocated for every row of the local diagonal submatrix, and o_nz 3856 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3857 One way to choose d_nz and o_nz is to use the max nonzerors per local 3858 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3859 In this case, the values of d_nz,o_nz are: 3860 .vb 3861 proc0 : dnz = 2, o_nz = 2 3862 proc1 : dnz = 3, o_nz = 2 3863 proc2 : dnz = 1, o_nz = 4 3864 .ve 3865 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3866 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3867 for proc3. i.e we are using 12+15+10=37 storage locations to store 3868 34 values. 3869 3870 When d_nnz, o_nnz parameters are specified, the storage is specified 3871 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3872 In the above case the values for d_nnz,o_nnz are: 3873 .vb 3874 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3875 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3876 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3877 .ve 3878 Here the space allocated is sum of all the above values i.e 34, and 3879 hence pre-allocation is perfect. 3880 3881 Level: intermediate 3882 3883 .keywords: matrix, aij, compressed row, sparse, parallel 3884 3885 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 3886 MPIAIJ, MatGetInfo() 3887 @*/ 3888 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3889 { 3890 PetscErrorCode ierr; 3891 3892 PetscFunctionBegin; 3893 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3894 PetscValidType(B,1); 3895 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3896 PetscFunctionReturn(0); 3897 } 3898 3899 #undef __FUNCT__ 3900 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3901 /*@ 3902 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3903 CSR format the local rows. 3904 3905 Collective on MPI_Comm 3906 3907 Input Parameters: 3908 + comm - MPI communicator 3909 . m - number of local rows (Cannot be PETSC_DECIDE) 3910 . n - This value should be the same as the local size used in creating the 3911 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3912 calculated if N is given) For square matrices n is almost always m. 3913 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3914 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3915 . i - row indices 3916 . j - column indices 3917 - a - matrix values 3918 3919 Output Parameter: 3920 . mat - the matrix 3921 3922 Level: intermediate 3923 3924 Notes: 3925 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3926 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3927 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3928 3929 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3930 3931 The format which is used for the sparse matrix input, is equivalent to a 3932 row-major ordering.. i.e for the following matrix, the input data expected is 3933 as shown: 3934 3935 1 0 0 3936 2 0 3 P0 3937 ------- 3938 4 5 6 P1 3939 3940 Process0 [P0]: rows_owned=[0,1] 3941 i = {0,1,3} [size = nrow+1 = 2+1] 3942 j = {0,0,2} [size = nz = 6] 3943 v = {1,2,3} [size = nz = 6] 3944 3945 Process1 [P1]: rows_owned=[2] 3946 i = {0,3} [size = nrow+1 = 1+1] 3947 j = {0,1,2} [size = nz = 6] 3948 v = {4,5,6} [size = nz = 6] 3949 3950 .keywords: matrix, aij, compressed row, sparse, parallel 3951 3952 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3953 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 3954 @*/ 3955 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3956 { 3957 PetscErrorCode ierr; 3958 3959 PetscFunctionBegin; 3960 if (i[0]) { 3961 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3962 } 3963 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3964 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3965 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3966 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3967 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3968 PetscFunctionReturn(0); 3969 } 3970 3971 #undef __FUNCT__ 3972 #define __FUNCT__ "MatCreateMPIAIJ" 3973 /*@C 3974 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 3975 (the default parallel PETSc format). For good matrix assembly performance 3976 the user should preallocate the matrix storage by setting the parameters 3977 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3978 performance can be increased by more than a factor of 50. 3979 3980 Collective on MPI_Comm 3981 3982 Input Parameters: 3983 + comm - MPI communicator 3984 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3985 This value should be the same as the local size used in creating the 3986 y vector for the matrix-vector product y = Ax. 3987 . n - This value should be the same as the local size used in creating the 3988 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3989 calculated if N is given) For square matrices n is almost always m. 3990 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3991 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3992 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3993 (same value is used for all local rows) 3994 . d_nnz - array containing the number of nonzeros in the various rows of the 3995 DIAGONAL portion of the local submatrix (possibly different for each row) 3996 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3997 The size of this array is equal to the number of local rows, i.e 'm'. 3998 You must leave room for the diagonal entry even if it is zero. 3999 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4000 submatrix (same value is used for all local rows). 4001 - o_nnz - array containing the number of nonzeros in the various rows of the 4002 OFF-DIAGONAL portion of the local submatrix (possibly different for 4003 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4004 structure. The size of this array is equal to the number 4005 of local rows, i.e 'm'. 4006 4007 Output Parameter: 4008 . A - the matrix 4009 4010 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4011 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4012 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4013 4014 Notes: 4015 If the *_nnz parameter is given then the *_nz parameter is ignored 4016 4017 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4018 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4019 storage requirements for this matrix. 4020 4021 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4022 processor than it must be used on all processors that share the object for 4023 that argument. 4024 4025 The user MUST specify either the local or global matrix dimensions 4026 (possibly both). 4027 4028 The parallel matrix is partitioned across processors such that the 4029 first m0 rows belong to process 0, the next m1 rows belong to 4030 process 1, the next m2 rows belong to process 2 etc.. where 4031 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4032 values corresponding to [m x N] submatrix. 4033 4034 The columns are logically partitioned with the n0 columns belonging 4035 to 0th partition, the next n1 columns belonging to the next 4036 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4037 4038 The DIAGONAL portion of the local submatrix on any given processor 4039 is the submatrix corresponding to the rows and columns m,n 4040 corresponding to the given processor. i.e diagonal matrix on 4041 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4042 etc. The remaining portion of the local submatrix [m x (N-n)] 4043 constitute the OFF-DIAGONAL portion. The example below better 4044 illustrates this concept. 4045 4046 For a square global matrix we define each processor's diagonal portion 4047 to be its local rows and the corresponding columns (a square submatrix); 4048 each processor's off-diagonal portion encompasses the remainder of the 4049 local matrix (a rectangular submatrix). 4050 4051 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4052 4053 When calling this routine with a single process communicator, a matrix of 4054 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4055 type of communicator, use the construction mechanism: 4056 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4057 4058 By default, this format uses inodes (identical nodes) when possible. 4059 We search for consecutive rows with the same nonzero structure, thereby 4060 reusing matrix information to achieve increased efficiency. 4061 4062 Options Database Keys: 4063 + -mat_no_inode - Do not use inodes 4064 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4065 - -mat_aij_oneindex - Internally use indexing starting at 1 4066 rather than 0. Note that when calling MatSetValues(), 4067 the user still MUST index entries starting at 0! 4068 4069 4070 Example usage: 4071 4072 Consider the following 8x8 matrix with 34 non-zero values, that is 4073 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4074 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4075 as follows: 4076 4077 .vb 4078 1 2 0 | 0 3 0 | 0 4 4079 Proc0 0 5 6 | 7 0 0 | 8 0 4080 9 0 10 | 11 0 0 | 12 0 4081 ------------------------------------- 4082 13 0 14 | 15 16 17 | 0 0 4083 Proc1 0 18 0 | 19 20 21 | 0 0 4084 0 0 0 | 22 23 0 | 24 0 4085 ------------------------------------- 4086 Proc2 25 26 27 | 0 0 28 | 29 0 4087 30 0 0 | 31 32 33 | 0 34 4088 .ve 4089 4090 This can be represented as a collection of submatrices as: 4091 4092 .vb 4093 A B C 4094 D E F 4095 G H I 4096 .ve 4097 4098 Where the submatrices A,B,C are owned by proc0, D,E,F are 4099 owned by proc1, G,H,I are owned by proc2. 4100 4101 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4102 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4103 The 'M','N' parameters are 8,8, and have the same values on all procs. 4104 4105 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4106 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4107 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4108 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4109 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4110 matrix, ans [DF] as another SeqAIJ matrix. 4111 4112 When d_nz, o_nz parameters are specified, d_nz storage elements are 4113 allocated for every row of the local diagonal submatrix, and o_nz 4114 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4115 One way to choose d_nz and o_nz is to use the max nonzerors per local 4116 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4117 In this case, the values of d_nz,o_nz are: 4118 .vb 4119 proc0 : dnz = 2, o_nz = 2 4120 proc1 : dnz = 3, o_nz = 2 4121 proc2 : dnz = 1, o_nz = 4 4122 .ve 4123 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4124 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4125 for proc3. i.e we are using 12+15+10=37 storage locations to store 4126 34 values. 4127 4128 When d_nnz, o_nnz parameters are specified, the storage is specified 4129 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4130 In the above case the values for d_nnz,o_nnz are: 4131 .vb 4132 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4133 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4134 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4135 .ve 4136 Here the space allocated is sum of all the above values i.e 34, and 4137 hence pre-allocation is perfect. 4138 4139 Level: intermediate 4140 4141 .keywords: matrix, aij, compressed row, sparse, parallel 4142 4143 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4144 MPIAIJ, MatCreateMPIAIJWithArrays() 4145 @*/ 4146 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4147 { 4148 PetscErrorCode ierr; 4149 PetscMPIInt size; 4150 4151 PetscFunctionBegin; 4152 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4153 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4154 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4155 if (size > 1) { 4156 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4157 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4158 } else { 4159 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4160 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4161 } 4162 PetscFunctionReturn(0); 4163 } 4164 4165 #undef __FUNCT__ 4166 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4167 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4168 { 4169 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4170 4171 PetscFunctionBegin; 4172 *Ad = a->A; 4173 *Ao = a->B; 4174 *colmap = a->garray; 4175 PetscFunctionReturn(0); 4176 } 4177 4178 #undef __FUNCT__ 4179 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4180 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4181 { 4182 PetscErrorCode ierr; 4183 PetscInt i; 4184 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4185 4186 PetscFunctionBegin; 4187 if (coloring->ctype == IS_COLORING_GLOBAL) { 4188 ISColoringValue *allcolors,*colors; 4189 ISColoring ocoloring; 4190 4191 /* set coloring for diagonal portion */ 4192 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4193 4194 /* set coloring for off-diagonal portion */ 4195 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4196 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4197 for (i=0; i<a->B->cmap->n; i++) { 4198 colors[i] = allcolors[a->garray[i]]; 4199 } 4200 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4201 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4202 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4203 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4204 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4205 ISColoringValue *colors; 4206 PetscInt *larray; 4207 ISColoring ocoloring; 4208 4209 /* set coloring for diagonal portion */ 4210 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4211 for (i=0; i<a->A->cmap->n; i++) { 4212 larray[i] = i + A->cmap->rstart; 4213 } 4214 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4215 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4216 for (i=0; i<a->A->cmap->n; i++) { 4217 colors[i] = coloring->colors[larray[i]]; 4218 } 4219 ierr = PetscFree(larray);CHKERRQ(ierr); 4220 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4221 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4222 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4223 4224 /* set coloring for off-diagonal portion */ 4225 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4226 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4227 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4228 for (i=0; i<a->B->cmap->n; i++) { 4229 colors[i] = coloring->colors[larray[i]]; 4230 } 4231 ierr = PetscFree(larray);CHKERRQ(ierr); 4232 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4233 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4234 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4235 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4236 4237 PetscFunctionReturn(0); 4238 } 4239 4240 #if defined(PETSC_HAVE_ADIC) 4241 #undef __FUNCT__ 4242 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4243 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4244 { 4245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4246 PetscErrorCode ierr; 4247 4248 PetscFunctionBegin; 4249 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4250 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4251 PetscFunctionReturn(0); 4252 } 4253 #endif 4254 4255 #undef __FUNCT__ 4256 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4257 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4258 { 4259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4260 PetscErrorCode ierr; 4261 4262 PetscFunctionBegin; 4263 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4264 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4265 PetscFunctionReturn(0); 4266 } 4267 4268 #undef __FUNCT__ 4269 #define __FUNCT__ "MatMergeSymbolic" 4270 PetscErrorCode MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4271 { 4272 PetscErrorCode ierr; 4273 PetscInt m,N,i,rstart,nnz,*dnz,*onz; 4274 PetscInt *indx; 4275 4276 PetscFunctionBegin; 4277 /* This routine will ONLY return MPIAIJ type matrix */ 4278 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4279 if (n == PETSC_DECIDE){ 4280 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4281 } 4282 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4283 rstart -= m; 4284 4285 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4286 for (i=0;i<m;i++) { 4287 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4288 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4289 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4290 } 4291 4292 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4293 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4294 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4295 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4296 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4297 PetscFunctionReturn(0); 4298 } 4299 4300 #undef __FUNCT__ 4301 #define __FUNCT__ "MatMergeNumeric" 4302 PetscErrorCode MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4303 { 4304 PetscErrorCode ierr; 4305 PetscInt m,N,i,rstart,nnz,Ii; 4306 PetscInt *indx; 4307 PetscScalar *values; 4308 4309 PetscFunctionBegin; 4310 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4311 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4312 for (i=0;i<m;i++) { 4313 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4314 Ii = i + rstart; 4315 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4316 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4317 } 4318 ierr = MatDestroy(&inmat);CHKERRQ(ierr); 4319 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4320 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4321 PetscFunctionReturn(0); 4322 } 4323 4324 #undef __FUNCT__ 4325 #define __FUNCT__ "MatMerge" 4326 /*@ 4327 MatMerge - Creates a single large PETSc matrix by concatinating sequential 4328 matrices from each processor 4329 4330 Collective on MPI_Comm 4331 4332 Input Parameters: 4333 + comm - the communicators the parallel matrix will live on 4334 . inmat - the input sequential matrices 4335 . n - number of local columns (or PETSC_DECIDE) 4336 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4337 4338 Output Parameter: 4339 . outmat - the parallel matrix generated 4340 4341 Level: advanced 4342 4343 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4344 4345 @*/ 4346 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4347 { 4348 PetscErrorCode ierr; 4349 4350 PetscFunctionBegin; 4351 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4352 if (scall == MAT_INITIAL_MATRIX){ 4353 ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4354 } 4355 ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4356 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4357 PetscFunctionReturn(0); 4358 } 4359 4360 #undef __FUNCT__ 4361 #define __FUNCT__ "MatFileSplit" 4362 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4363 { 4364 PetscErrorCode ierr; 4365 PetscMPIInt rank; 4366 PetscInt m,N,i,rstart,nnz; 4367 size_t len; 4368 const PetscInt *indx; 4369 PetscViewer out; 4370 char *name; 4371 Mat B; 4372 const PetscScalar *values; 4373 4374 PetscFunctionBegin; 4375 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4376 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4377 /* Should this be the type of the diagonal block of A? */ 4378 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4379 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4380 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4381 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4382 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4383 for (i=0;i<m;i++) { 4384 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4385 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4386 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4387 } 4388 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4389 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4390 4391 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4392 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4393 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4394 sprintf(name,"%s.%d",outfile,rank); 4395 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4396 ierr = PetscFree(name); 4397 ierr = MatView(B,out);CHKERRQ(ierr); 4398 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4399 ierr = MatDestroy(&B);CHKERRQ(ierr); 4400 PetscFunctionReturn(0); 4401 } 4402 4403 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4404 #undef __FUNCT__ 4405 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4406 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4407 { 4408 PetscErrorCode ierr; 4409 Mat_Merge_SeqsToMPI *merge; 4410 PetscContainer container; 4411 4412 PetscFunctionBegin; 4413 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4414 if (container) { 4415 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4416 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4417 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4418 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4419 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4420 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4421 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4423 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4424 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4425 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4426 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4427 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4428 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4429 ierr = PetscFree(merge);CHKERRQ(ierr); 4430 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4431 } 4432 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4433 PetscFunctionReturn(0); 4434 } 4435 4436 #include <../src/mat/utils/freespace.h> 4437 #include <petscbt.h> 4438 4439 #undef __FUNCT__ 4440 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 4441 /*@C 4442 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 4443 matrices from each processor 4444 4445 Collective on MPI_Comm 4446 4447 Input Parameters: 4448 + comm - the communicators the parallel matrix will live on 4449 . seqmat - the input sequential matrices 4450 . m - number of local rows (or PETSC_DECIDE) 4451 . n - number of local columns (or PETSC_DECIDE) 4452 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4453 4454 Output Parameter: 4455 . mpimat - the parallel matrix generated 4456 4457 Level: advanced 4458 4459 Notes: 4460 The dimensions of the sequential matrix in each processor MUST be the same. 4461 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4462 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4463 @*/ 4464 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 4465 { 4466 PetscErrorCode ierr; 4467 MPI_Comm comm=((PetscObject)mpimat)->comm; 4468 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4469 PetscMPIInt size,rank,taga,*len_s; 4470 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4471 PetscInt proc,m; 4472 PetscInt **buf_ri,**buf_rj; 4473 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4474 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4475 MPI_Request *s_waits,*r_waits; 4476 MPI_Status *status; 4477 MatScalar *aa=a->a; 4478 MatScalar **abuf_r,*ba_i; 4479 Mat_Merge_SeqsToMPI *merge; 4480 PetscContainer container; 4481 4482 PetscFunctionBegin; 4483 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4484 4485 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4486 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4487 4488 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4489 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4490 4491 bi = merge->bi; 4492 bj = merge->bj; 4493 buf_ri = merge->buf_ri; 4494 buf_rj = merge->buf_rj; 4495 4496 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4497 owners = merge->rowmap->range; 4498 len_s = merge->len_s; 4499 4500 /* send and recv matrix values */ 4501 /*-----------------------------*/ 4502 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4503 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4504 4505 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4506 for (proc=0,k=0; proc<size; proc++){ 4507 if (!len_s[proc]) continue; 4508 i = owners[proc]; 4509 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4510 k++; 4511 } 4512 4513 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4514 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4515 ierr = PetscFree(status);CHKERRQ(ierr); 4516 4517 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4518 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4519 4520 /* insert mat values of mpimat */ 4521 /*----------------------------*/ 4522 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4523 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4524 4525 for (k=0; k<merge->nrecv; k++){ 4526 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4527 nrows = *(buf_ri_k[k]); 4528 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4529 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4530 } 4531 4532 /* set values of ba */ 4533 m = merge->rowmap->n; 4534 for (i=0; i<m; i++) { 4535 arow = owners[rank] + i; 4536 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4537 bnzi = bi[i+1] - bi[i]; 4538 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4539 4540 /* add local non-zero vals of this proc's seqmat into ba */ 4541 anzi = ai[arow+1] - ai[arow]; 4542 aj = a->j + ai[arow]; 4543 aa = a->a + ai[arow]; 4544 nextaj = 0; 4545 for (j=0; nextaj<anzi; j++){ 4546 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4547 ba_i[j] += aa[nextaj++]; 4548 } 4549 } 4550 4551 /* add received vals into ba */ 4552 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4553 /* i-th row */ 4554 if (i == *nextrow[k]) { 4555 anzi = *(nextai[k]+1) - *nextai[k]; 4556 aj = buf_rj[k] + *(nextai[k]); 4557 aa = abuf_r[k] + *(nextai[k]); 4558 nextaj = 0; 4559 for (j=0; nextaj<anzi; j++){ 4560 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4561 ba_i[j] += aa[nextaj++]; 4562 } 4563 } 4564 nextrow[k]++; nextai[k]++; 4565 } 4566 } 4567 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4568 } 4569 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4570 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4571 4572 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4573 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4574 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4575 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4576 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4577 PetscFunctionReturn(0); 4578 } 4579 4580 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4581 4582 #undef __FUNCT__ 4583 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 4584 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4585 { 4586 PetscErrorCode ierr; 4587 Mat B_mpi; 4588 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4589 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4590 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4591 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4592 PetscInt len,proc,*dnz,*onz; 4593 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4594 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4595 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4596 MPI_Status *status; 4597 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4598 PetscBT lnkbt; 4599 Mat_Merge_SeqsToMPI *merge; 4600 PetscContainer container; 4601 4602 PetscFunctionBegin; 4603 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4604 4605 /* make sure it is a PETSc comm */ 4606 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4607 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4608 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4609 4610 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4611 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4612 4613 /* determine row ownership */ 4614 /*---------------------------------------------------------*/ 4615 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4616 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4617 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4618 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4619 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4620 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4621 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4622 4623 m = merge->rowmap->n; 4624 M = merge->rowmap->N; 4625 owners = merge->rowmap->range; 4626 4627 /* determine the number of messages to send, their lengths */ 4628 /*---------------------------------------------------------*/ 4629 len_s = merge->len_s; 4630 4631 len = 0; /* length of buf_si[] */ 4632 merge->nsend = 0; 4633 for (proc=0; proc<size; proc++){ 4634 len_si[proc] = 0; 4635 if (proc == rank){ 4636 len_s[proc] = 0; 4637 } else { 4638 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4639 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4640 } 4641 if (len_s[proc]) { 4642 merge->nsend++; 4643 nrows = 0; 4644 for (i=owners[proc]; i<owners[proc+1]; i++){ 4645 if (ai[i+1] > ai[i]) nrows++; 4646 } 4647 len_si[proc] = 2*(nrows+1); 4648 len += len_si[proc]; 4649 } 4650 } 4651 4652 /* determine the number and length of messages to receive for ij-structure */ 4653 /*-------------------------------------------------------------------------*/ 4654 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4655 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4656 4657 /* post the Irecv of j-structure */ 4658 /*-------------------------------*/ 4659 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4660 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4661 4662 /* post the Isend of j-structure */ 4663 /*--------------------------------*/ 4664 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4665 4666 for (proc=0, k=0; proc<size; proc++){ 4667 if (!len_s[proc]) continue; 4668 i = owners[proc]; 4669 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4670 k++; 4671 } 4672 4673 /* receives and sends of j-structure are complete */ 4674 /*------------------------------------------------*/ 4675 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4676 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4677 4678 /* send and recv i-structure */ 4679 /*---------------------------*/ 4680 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4681 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4682 4683 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4684 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4685 for (proc=0,k=0; proc<size; proc++){ 4686 if (!len_s[proc]) continue; 4687 /* form outgoing message for i-structure: 4688 buf_si[0]: nrows to be sent 4689 [1:nrows]: row index (global) 4690 [nrows+1:2*nrows+1]: i-structure index 4691 */ 4692 /*-------------------------------------------*/ 4693 nrows = len_si[proc]/2 - 1; 4694 buf_si_i = buf_si + nrows+1; 4695 buf_si[0] = nrows; 4696 buf_si_i[0] = 0; 4697 nrows = 0; 4698 for (i=owners[proc]; i<owners[proc+1]; i++){ 4699 anzi = ai[i+1] - ai[i]; 4700 if (anzi) { 4701 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4702 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4703 nrows++; 4704 } 4705 } 4706 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4707 k++; 4708 buf_si += len_si[proc]; 4709 } 4710 4711 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4712 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4713 4714 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4715 for (i=0; i<merge->nrecv; i++){ 4716 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4717 } 4718 4719 ierr = PetscFree(len_si);CHKERRQ(ierr); 4720 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4721 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4722 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4723 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4724 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4725 ierr = PetscFree(status);CHKERRQ(ierr); 4726 4727 /* compute a local seq matrix in each processor */ 4728 /*----------------------------------------------*/ 4729 /* allocate bi array and free space for accumulating nonzero column info */ 4730 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4731 bi[0] = 0; 4732 4733 /* create and initialize a linked list */ 4734 nlnk = N+1; 4735 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4736 4737 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4738 len = 0; 4739 len = ai[owners[rank+1]] - ai[owners[rank]]; 4740 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4741 current_space = free_space; 4742 4743 /* determine symbolic info for each local row */ 4744 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4745 4746 for (k=0; k<merge->nrecv; k++){ 4747 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4748 nrows = *buf_ri_k[k]; 4749 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4750 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4751 } 4752 4753 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4754 len = 0; 4755 for (i=0;i<m;i++) { 4756 bnzi = 0; 4757 /* add local non-zero cols of this proc's seqmat into lnk */ 4758 arow = owners[rank] + i; 4759 anzi = ai[arow+1] - ai[arow]; 4760 aj = a->j + ai[arow]; 4761 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4762 bnzi += nlnk; 4763 /* add received col data into lnk */ 4764 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4765 if (i == *nextrow[k]) { /* i-th row */ 4766 anzi = *(nextai[k]+1) - *nextai[k]; 4767 aj = buf_rj[k] + *nextai[k]; 4768 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4769 bnzi += nlnk; 4770 nextrow[k]++; nextai[k]++; 4771 } 4772 } 4773 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4774 4775 /* if free space is not available, make more free space */ 4776 if (current_space->local_remaining<bnzi) { 4777 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4778 nspacedouble++; 4779 } 4780 /* copy data into free space, then initialize lnk */ 4781 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4782 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4783 4784 current_space->array += bnzi; 4785 current_space->local_used += bnzi; 4786 current_space->local_remaining -= bnzi; 4787 4788 bi[i+1] = bi[i] + bnzi; 4789 } 4790 4791 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4792 4793 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4794 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4795 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4796 4797 /* create symbolic parallel matrix B_mpi */ 4798 /*---------------------------------------*/ 4799 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4800 if (n==PETSC_DECIDE) { 4801 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4802 } else { 4803 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4804 } 4805 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4806 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4807 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4808 4809 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 4810 B_mpi->assembled = PETSC_FALSE; 4811 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4812 merge->bi = bi; 4813 merge->bj = bj; 4814 merge->buf_ri = buf_ri; 4815 merge->buf_rj = buf_rj; 4816 merge->coi = PETSC_NULL; 4817 merge->coj = PETSC_NULL; 4818 merge->owners_co = PETSC_NULL; 4819 4820 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4821 4822 /* attach the supporting struct to B_mpi for reuse */ 4823 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4824 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4825 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4826 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4827 *mpimat = B_mpi; 4828 4829 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4830 PetscFunctionReturn(0); 4831 } 4832 4833 #undef __FUNCT__ 4834 #define __FUNCT__ "MatMerge_SeqsToMPI" 4835 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4836 { 4837 PetscErrorCode ierr; 4838 4839 PetscFunctionBegin; 4840 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4841 if (scall == MAT_INITIAL_MATRIX){ 4842 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4843 } 4844 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 4845 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4846 PetscFunctionReturn(0); 4847 } 4848 4849 #undef __FUNCT__ 4850 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4851 /*@ 4852 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4853 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4854 with MatGetSize() 4855 4856 Not Collective 4857 4858 Input Parameters: 4859 + A - the matrix 4860 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4861 4862 Output Parameter: 4863 . A_loc - the local sequential matrix generated 4864 4865 Level: developer 4866 4867 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4868 4869 @*/ 4870 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4871 { 4872 PetscErrorCode ierr; 4873 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4874 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4875 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4876 MatScalar *aa=a->a,*ba=b->a,*cam; 4877 PetscScalar *ca; 4878 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4879 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4880 PetscBool match; 4881 4882 PetscFunctionBegin; 4883 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4884 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4885 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4886 if (scall == MAT_INITIAL_MATRIX){ 4887 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4888 ci[0] = 0; 4889 for (i=0; i<am; i++){ 4890 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4891 } 4892 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4893 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4894 k = 0; 4895 for (i=0; i<am; i++) { 4896 ncols_o = bi[i+1] - bi[i]; 4897 ncols_d = ai[i+1] - ai[i]; 4898 /* off-diagonal portion of A */ 4899 for (jo=0; jo<ncols_o; jo++) { 4900 col = cmap[*bj]; 4901 if (col >= cstart) break; 4902 cj[k] = col; bj++; 4903 ca[k++] = *ba++; 4904 } 4905 /* diagonal portion of A */ 4906 for (j=0; j<ncols_d; j++) { 4907 cj[k] = cstart + *aj++; 4908 ca[k++] = *aa++; 4909 } 4910 /* off-diagonal portion of A */ 4911 for (j=jo; j<ncols_o; j++) { 4912 cj[k] = cmap[*bj++]; 4913 ca[k++] = *ba++; 4914 } 4915 } 4916 /* put together the new matrix */ 4917 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4918 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4919 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4920 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4921 mat->free_a = PETSC_TRUE; 4922 mat->free_ij = PETSC_TRUE; 4923 mat->nonew = 0; 4924 } else if (scall == MAT_REUSE_MATRIX){ 4925 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4926 ci = mat->i; cj = mat->j; cam = mat->a; 4927 for (i=0; i<am; i++) { 4928 /* off-diagonal portion of A */ 4929 ncols_o = bi[i+1] - bi[i]; 4930 for (jo=0; jo<ncols_o; jo++) { 4931 col = cmap[*bj]; 4932 if (col >= cstart) break; 4933 *cam++ = *ba++; bj++; 4934 } 4935 /* diagonal portion of A */ 4936 ncols_d = ai[i+1] - ai[i]; 4937 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4938 /* off-diagonal portion of A */ 4939 for (j=jo; j<ncols_o; j++) { 4940 *cam++ = *ba++; bj++; 4941 } 4942 } 4943 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4944 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4945 PetscFunctionReturn(0); 4946 } 4947 4948 #undef __FUNCT__ 4949 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4950 /*@C 4951 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4952 4953 Not Collective 4954 4955 Input Parameters: 4956 + A - the matrix 4957 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4958 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 4959 4960 Output Parameter: 4961 . A_loc - the local sequential matrix generated 4962 4963 Level: developer 4964 4965 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4966 4967 @*/ 4968 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4969 { 4970 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4971 PetscErrorCode ierr; 4972 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4973 IS isrowa,iscola; 4974 Mat *aloc; 4975 PetscBool match; 4976 4977 PetscFunctionBegin; 4978 ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4979 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4980 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4981 if (!row){ 4982 start = A->rmap->rstart; end = A->rmap->rend; 4983 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4984 } else { 4985 isrowa = *row; 4986 } 4987 if (!col){ 4988 start = A->cmap->rstart; 4989 cmap = a->garray; 4990 nzA = a->A->cmap->n; 4991 nzB = a->B->cmap->n; 4992 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 4993 ncols = 0; 4994 for (i=0; i<nzB; i++) { 4995 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4996 else break; 4997 } 4998 imark = i; 4999 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5000 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5001 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5002 } else { 5003 iscola = *col; 5004 } 5005 if (scall != MAT_INITIAL_MATRIX){ 5006 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5007 aloc[0] = *A_loc; 5008 } 5009 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5010 *A_loc = aloc[0]; 5011 ierr = PetscFree(aloc);CHKERRQ(ierr); 5012 if (!row){ 5013 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5014 } 5015 if (!col){ 5016 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5017 } 5018 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5019 PetscFunctionReturn(0); 5020 } 5021 5022 #undef __FUNCT__ 5023 #define __FUNCT__ "MatGetBrowsOfAcols" 5024 /*@C 5025 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5026 5027 Collective on Mat 5028 5029 Input Parameters: 5030 + A,B - the matrices in mpiaij format 5031 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5032 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5033 5034 Output Parameter: 5035 + rowb, colb - index sets of rows and columns of B to extract 5036 - B_seq - the sequential matrix generated 5037 5038 Level: developer 5039 5040 @*/ 5041 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5042 { 5043 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5044 PetscErrorCode ierr; 5045 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5046 IS isrowb,iscolb; 5047 Mat *bseq=PETSC_NULL; 5048 5049 PetscFunctionBegin; 5050 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5051 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5052 } 5053 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5054 5055 if (scall == MAT_INITIAL_MATRIX){ 5056 start = A->cmap->rstart; 5057 cmap = a->garray; 5058 nzA = a->A->cmap->n; 5059 nzB = a->B->cmap->n; 5060 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5061 ncols = 0; 5062 for (i=0; i<nzB; i++) { /* row < local row index */ 5063 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5064 else break; 5065 } 5066 imark = i; 5067 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5068 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5069 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5070 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5071 } else { 5072 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5073 isrowb = *rowb; iscolb = *colb; 5074 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5075 bseq[0] = *B_seq; 5076 } 5077 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5078 *B_seq = bseq[0]; 5079 ierr = PetscFree(bseq);CHKERRQ(ierr); 5080 if (!rowb){ 5081 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5082 } else { 5083 *rowb = isrowb; 5084 } 5085 if (!colb){ 5086 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5087 } else { 5088 *colb = iscolb; 5089 } 5090 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5091 PetscFunctionReturn(0); 5092 } 5093 5094 #undef __FUNCT__ 5095 #define __FUNCT__ "MatGetBrowsOfAoCols" 5096 /*@C 5097 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5098 of the OFF-DIAGONAL portion of local A 5099 5100 Collective on Mat 5101 5102 Input Parameters: 5103 + A,B - the matrices in mpiaij format 5104 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5105 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5106 . startsj_r - similar to startsj for receives 5107 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5108 5109 Output Parameter: 5110 + B_oth - the sequential matrix generated 5111 5112 Level: developer 5113 5114 @*/ 5115 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5116 { 5117 VecScatter_MPI_General *gen_to,*gen_from; 5118 PetscErrorCode ierr; 5119 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5120 Mat_SeqAIJ *b_oth; 5121 VecScatter ctx=a->Mvctx; 5122 MPI_Comm comm=((PetscObject)ctx)->comm; 5123 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5124 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5125 PetscScalar *rvalues,*svalues; 5126 MatScalar *b_otha,*bufa,*bufA; 5127 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5128 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5129 MPI_Status *sstatus,rstatus; 5130 PetscMPIInt jj; 5131 PetscInt *cols,sbs,rbs; 5132 PetscScalar *vals; 5133 5134 PetscFunctionBegin; 5135 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5136 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5137 } 5138 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5139 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5140 5141 gen_to = (VecScatter_MPI_General*)ctx->todata; 5142 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5143 rvalues = gen_from->values; /* holds the length of receiving row */ 5144 svalues = gen_to->values; /* holds the length of sending row */ 5145 nrecvs = gen_from->n; 5146 nsends = gen_to->n; 5147 5148 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5149 srow = gen_to->indices; /* local row index to be sent */ 5150 sstarts = gen_to->starts; 5151 sprocs = gen_to->procs; 5152 sstatus = gen_to->sstatus; 5153 sbs = gen_to->bs; 5154 rstarts = gen_from->starts; 5155 rprocs = gen_from->procs; 5156 rbs = gen_from->bs; 5157 5158 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5159 if (scall == MAT_INITIAL_MATRIX){ 5160 /* i-array */ 5161 /*---------*/ 5162 /* post receives */ 5163 for (i=0; i<nrecvs; i++){ 5164 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5165 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5166 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5167 } 5168 5169 /* pack the outgoing message */ 5170 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5171 sstartsj[0] = 0; rstartsj[0] = 0; 5172 len = 0; /* total length of j or a array to be sent */ 5173 k = 0; 5174 for (i=0; i<nsends; i++){ 5175 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5176 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5177 for (j=0; j<nrows; j++) { 5178 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5179 for (l=0; l<sbs; l++){ 5180 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5181 rowlen[j*sbs+l] = ncols; 5182 len += ncols; 5183 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5184 } 5185 k++; 5186 } 5187 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5188 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5189 } 5190 /* recvs and sends of i-array are completed */ 5191 i = nrecvs; 5192 while (i--) { 5193 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5194 } 5195 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5196 5197 /* allocate buffers for sending j and a arrays */ 5198 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5199 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5200 5201 /* create i-array of B_oth */ 5202 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5203 b_othi[0] = 0; 5204 len = 0; /* total length of j or a array to be received */ 5205 k = 0; 5206 for (i=0; i<nrecvs; i++){ 5207 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5208 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5209 for (j=0; j<nrows; j++) { 5210 b_othi[k+1] = b_othi[k] + rowlen[j]; 5211 len += rowlen[j]; k++; 5212 } 5213 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5214 } 5215 5216 /* allocate space for j and a arrrays of B_oth */ 5217 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5218 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5219 5220 /* j-array */ 5221 /*---------*/ 5222 /* post receives of j-array */ 5223 for (i=0; i<nrecvs; i++){ 5224 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5225 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5226 } 5227 5228 /* pack the outgoing message j-array */ 5229 k = 0; 5230 for (i=0; i<nsends; i++){ 5231 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5232 bufJ = bufj+sstartsj[i]; 5233 for (j=0; j<nrows; j++) { 5234 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5235 for (ll=0; ll<sbs; ll++){ 5236 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5237 for (l=0; l<ncols; l++){ 5238 *bufJ++ = cols[l]; 5239 } 5240 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5241 } 5242 } 5243 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5244 } 5245 5246 /* recvs and sends of j-array are completed */ 5247 i = nrecvs; 5248 while (i--) { 5249 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5250 } 5251 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5252 } else if (scall == MAT_REUSE_MATRIX){ 5253 sstartsj = *startsj; 5254 rstartsj = *startsj_r; 5255 bufa = *bufa_ptr; 5256 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5257 b_otha = b_oth->a; 5258 } else { 5259 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5260 } 5261 5262 /* a-array */ 5263 /*---------*/ 5264 /* post receives of a-array */ 5265 for (i=0; i<nrecvs; i++){ 5266 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5267 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5268 } 5269 5270 /* pack the outgoing message a-array */ 5271 k = 0; 5272 for (i=0; i<nsends; i++){ 5273 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5274 bufA = bufa+sstartsj[i]; 5275 for (j=0; j<nrows; j++) { 5276 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5277 for (ll=0; ll<sbs; ll++){ 5278 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5279 for (l=0; l<ncols; l++){ 5280 *bufA++ = vals[l]; 5281 } 5282 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5283 } 5284 } 5285 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5286 } 5287 /* recvs and sends of a-array are completed */ 5288 i = nrecvs; 5289 while (i--) { 5290 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5291 } 5292 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5293 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5294 5295 if (scall == MAT_INITIAL_MATRIX){ 5296 /* put together the new matrix */ 5297 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5298 5299 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5300 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5301 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5302 b_oth->free_a = PETSC_TRUE; 5303 b_oth->free_ij = PETSC_TRUE; 5304 b_oth->nonew = 0; 5305 5306 ierr = PetscFree(bufj);CHKERRQ(ierr); 5307 if (!startsj || !bufa_ptr){ 5308 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5309 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5310 } else { 5311 *startsj = sstartsj; 5312 *startsj_r = rstartsj; 5313 *bufa_ptr = bufa; 5314 } 5315 } 5316 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5317 PetscFunctionReturn(0); 5318 } 5319 5320 #undef __FUNCT__ 5321 #define __FUNCT__ "MatGetCommunicationStructs" 5322 /*@C 5323 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5324 5325 Not Collective 5326 5327 Input Parameters: 5328 . A - The matrix in mpiaij format 5329 5330 Output Parameter: 5331 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5332 . colmap - A map from global column index to local index into lvec 5333 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5334 5335 Level: developer 5336 5337 @*/ 5338 #if defined (PETSC_USE_CTABLE) 5339 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5340 #else 5341 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5342 #endif 5343 { 5344 Mat_MPIAIJ *a; 5345 5346 PetscFunctionBegin; 5347 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5348 PetscValidPointer(lvec, 2); 5349 PetscValidPointer(colmap, 3); 5350 PetscValidPointer(multScatter, 4); 5351 a = (Mat_MPIAIJ *) A->data; 5352 if (lvec) *lvec = a->lvec; 5353 if (colmap) *colmap = a->colmap; 5354 if (multScatter) *multScatter = a->Mvctx; 5355 PetscFunctionReturn(0); 5356 } 5357 5358 EXTERN_C_BEGIN 5359 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5360 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5361 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5362 EXTERN_C_END 5363 5364 #undef __FUNCT__ 5365 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5366 /* 5367 Computes (B'*A')' since computing B*A directly is untenable 5368 5369 n p p 5370 ( ) ( ) ( ) 5371 m ( A ) * n ( B ) = m ( C ) 5372 ( ) ( ) ( ) 5373 5374 */ 5375 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5376 { 5377 PetscErrorCode ierr; 5378 Mat At,Bt,Ct; 5379 5380 PetscFunctionBegin; 5381 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5382 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5383 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5384 ierr = MatDestroy(&At);CHKERRQ(ierr); 5385 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5386 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5387 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5388 PetscFunctionReturn(0); 5389 } 5390 5391 #undef __FUNCT__ 5392 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5393 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5394 { 5395 PetscErrorCode ierr; 5396 PetscInt m=A->rmap->n,n=B->cmap->n; 5397 Mat Cmat; 5398 5399 PetscFunctionBegin; 5400 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5401 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5402 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5403 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5404 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5405 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5406 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5407 *C = Cmat; 5408 PetscFunctionReturn(0); 5409 } 5410 5411 /* ----------------------------------------------------------------*/ 5412 #undef __FUNCT__ 5413 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5414 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5415 { 5416 PetscErrorCode ierr; 5417 5418 PetscFunctionBegin; 5419 if (scall == MAT_INITIAL_MATRIX){ 5420 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5421 } 5422 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5423 PetscFunctionReturn(0); 5424 } 5425 5426 EXTERN_C_BEGIN 5427 #if defined(PETSC_HAVE_MUMPS) 5428 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5429 #endif 5430 #if defined(PETSC_HAVE_PASTIX) 5431 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5432 #endif 5433 #if defined(PETSC_HAVE_SUPERLU_DIST) 5434 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5435 #endif 5436 #if defined(PETSC_HAVE_SPOOLES) 5437 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5438 #endif 5439 EXTERN_C_END 5440 5441 /*MC 5442 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5443 5444 Options Database Keys: 5445 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5446 5447 Level: beginner 5448 5449 .seealso: MatCreateMPIAIJ() 5450 M*/ 5451 5452 EXTERN_C_BEGIN 5453 #undef __FUNCT__ 5454 #define __FUNCT__ "MatCreate_MPIAIJ" 5455 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5456 { 5457 Mat_MPIAIJ *b; 5458 PetscErrorCode ierr; 5459 PetscMPIInt size; 5460 5461 PetscFunctionBegin; 5462 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5463 5464 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5465 B->data = (void*)b; 5466 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5467 B->rmap->bs = 1; 5468 B->assembled = PETSC_FALSE; 5469 5470 B->insertmode = NOT_SET_VALUES; 5471 b->size = size; 5472 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5473 5474 /* build cache for off array entries formed */ 5475 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5476 b->donotstash = PETSC_FALSE; 5477 b->colmap = 0; 5478 b->garray = 0; 5479 b->roworiented = PETSC_TRUE; 5480 5481 /* stuff used for matrix vector multiply */ 5482 b->lvec = PETSC_NULL; 5483 b->Mvctx = PETSC_NULL; 5484 5485 /* stuff for MatGetRow() */ 5486 b->rowindices = 0; 5487 b->rowvalues = 0; 5488 b->getrowactive = PETSC_FALSE; 5489 5490 #if defined(PETSC_HAVE_SPOOLES) 5491 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5492 "MatGetFactor_mpiaij_spooles", 5493 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5494 #endif 5495 #if defined(PETSC_HAVE_MUMPS) 5496 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5497 "MatGetFactor_aij_mumps", 5498 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5499 #endif 5500 #if defined(PETSC_HAVE_PASTIX) 5501 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5502 "MatGetFactor_mpiaij_pastix", 5503 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5504 #endif 5505 #if defined(PETSC_HAVE_SUPERLU_DIST) 5506 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5507 "MatGetFactor_mpiaij_superlu_dist", 5508 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5509 #endif 5510 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5511 "MatStoreValues_MPIAIJ", 5512 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5513 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5514 "MatRetrieveValues_MPIAIJ", 5515 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5516 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5517 "MatGetDiagonalBlock_MPIAIJ", 5518 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5519 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5520 "MatIsTranspose_MPIAIJ", 5521 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5522 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5523 "MatMPIAIJSetPreallocation_MPIAIJ", 5524 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5525 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5526 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5527 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5528 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5529 "MatDiagonalScaleLocal_MPIAIJ", 5530 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5531 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5532 "MatConvert_MPIAIJ_MPIAIJPERM", 5533 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5534 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5535 "MatConvert_MPIAIJ_MPIAIJCRL", 5536 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5537 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5538 "MatConvert_MPIAIJ_MPISBAIJ", 5539 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5540 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5541 "MatMatMult_MPIDense_MPIAIJ", 5542 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5543 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5544 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5545 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5546 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5547 "MatMatMultNumeric_MPIDense_MPIAIJ", 5548 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5549 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5550 PetscFunctionReturn(0); 5551 } 5552 EXTERN_C_END 5553 5554 #undef __FUNCT__ 5555 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5556 /*@ 5557 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5558 and "off-diagonal" part of the matrix in CSR format. 5559 5560 Collective on MPI_Comm 5561 5562 Input Parameters: 5563 + comm - MPI communicator 5564 . m - number of local rows (Cannot be PETSC_DECIDE) 5565 . n - This value should be the same as the local size used in creating the 5566 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5567 calculated if N is given) For square matrices n is almost always m. 5568 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5569 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5570 . i - row indices for "diagonal" portion of matrix 5571 . j - column indices 5572 . a - matrix values 5573 . oi - row indices for "off-diagonal" portion of matrix 5574 . oj - column indices 5575 - oa - matrix values 5576 5577 Output Parameter: 5578 . mat - the matrix 5579 5580 Level: advanced 5581 5582 Notes: 5583 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5584 must free the arrays once the matrix has been destroyed and not before. 5585 5586 The i and j indices are 0 based 5587 5588 See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5589 5590 This sets local rows and cannot be used to set off-processor values. 5591 5592 You cannot later use MatSetValues() to change values in this matrix. 5593 5594 .keywords: matrix, aij, compressed row, sparse, parallel 5595 5596 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5597 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays() 5598 @*/ 5599 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5600 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5601 { 5602 PetscErrorCode ierr; 5603 Mat_MPIAIJ *maij; 5604 5605 PetscFunctionBegin; 5606 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5607 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5608 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5609 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5610 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5611 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5612 maij = (Mat_MPIAIJ*) (*mat)->data; 5613 maij->donotstash = PETSC_TRUE; 5614 (*mat)->preallocated = PETSC_TRUE; 5615 5616 ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr); 5617 ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr); 5618 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5619 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5620 5621 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5622 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5623 5624 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5625 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5626 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5627 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5628 5629 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5630 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5631 PetscFunctionReturn(0); 5632 } 5633 5634 /* 5635 Special version for direct calls from Fortran 5636 */ 5637 #include <private/fortranimpl.h> 5638 5639 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5640 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5641 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5642 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5643 #endif 5644 5645 /* Change these macros so can be used in void function */ 5646 #undef CHKERRQ 5647 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5648 #undef SETERRQ2 5649 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5650 #undef SETERRQ 5651 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5652 5653 EXTERN_C_BEGIN 5654 #undef __FUNCT__ 5655 #define __FUNCT__ "matsetvaluesmpiaij_" 5656 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5657 { 5658 Mat mat = *mmat; 5659 PetscInt m = *mm, n = *mn; 5660 InsertMode addv = *maddv; 5661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5662 PetscScalar value; 5663 PetscErrorCode ierr; 5664 5665 ierr = MatPreallocated(mat);CHKERRQ(ierr); 5666 if (mat->insertmode == NOT_SET_VALUES) { 5667 mat->insertmode = addv; 5668 } 5669 #if defined(PETSC_USE_DEBUG) 5670 else if (mat->insertmode != addv) { 5671 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5672 } 5673 #endif 5674 { 5675 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5676 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5677 PetscBool roworiented = aij->roworiented; 5678 5679 /* Some Variables required in the macro */ 5680 Mat A = aij->A; 5681 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5682 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5683 MatScalar *aa = a->a; 5684 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5685 Mat B = aij->B; 5686 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5687 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5688 MatScalar *ba = b->a; 5689 5690 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5691 PetscInt nonew = a->nonew; 5692 MatScalar *ap1,*ap2; 5693 5694 PetscFunctionBegin; 5695 for (i=0; i<m; i++) { 5696 if (im[i] < 0) continue; 5697 #if defined(PETSC_USE_DEBUG) 5698 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5699 #endif 5700 if (im[i] >= rstart && im[i] < rend) { 5701 row = im[i] - rstart; 5702 lastcol1 = -1; 5703 rp1 = aj + ai[row]; 5704 ap1 = aa + ai[row]; 5705 rmax1 = aimax[row]; 5706 nrow1 = ailen[row]; 5707 low1 = 0; 5708 high1 = nrow1; 5709 lastcol2 = -1; 5710 rp2 = bj + bi[row]; 5711 ap2 = ba + bi[row]; 5712 rmax2 = bimax[row]; 5713 nrow2 = bilen[row]; 5714 low2 = 0; 5715 high2 = nrow2; 5716 5717 for (j=0; j<n; j++) { 5718 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5719 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5720 if (in[j] >= cstart && in[j] < cend){ 5721 col = in[j] - cstart; 5722 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5723 } else if (in[j] < 0) continue; 5724 #if defined(PETSC_USE_DEBUG) 5725 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5726 #endif 5727 else { 5728 if (mat->was_assembled) { 5729 if (!aij->colmap) { 5730 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5731 } 5732 #if defined (PETSC_USE_CTABLE) 5733 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5734 col--; 5735 #else 5736 col = aij->colmap[in[j]] - 1; 5737 #endif 5738 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5739 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5740 col = in[j]; 5741 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5742 B = aij->B; 5743 b = (Mat_SeqAIJ*)B->data; 5744 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5745 rp2 = bj + bi[row]; 5746 ap2 = ba + bi[row]; 5747 rmax2 = bimax[row]; 5748 nrow2 = bilen[row]; 5749 low2 = 0; 5750 high2 = nrow2; 5751 bm = aij->B->rmap->n; 5752 ba = b->a; 5753 } 5754 } else col = in[j]; 5755 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5756 } 5757 } 5758 } else { 5759 if (!aij->donotstash) { 5760 if (roworiented) { 5761 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5762 } else { 5763 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5764 } 5765 } 5766 } 5767 }} 5768 PetscFunctionReturnVoid(); 5769 } 5770 EXTERN_C_END 5771 5772