1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for (j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) { 215 rowners[i] += rowners[i-1]; 216 } 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 *inmat = mat; 300 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 301 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 302 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 303 mat = *inmat; 304 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 305 if (!rank) { 306 /* send numerical values to other processes */ 307 gmata = (Mat_SeqAIJ*) gmat->data; 308 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 309 gmataa = gmata->a; 310 for (i=1; i<size; i++) { 311 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 312 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 313 } 314 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 315 } else { 316 /* receive numerical values from process 0*/ 317 nz = Ad->nz + Ao->nz; 318 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 319 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 320 } 321 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 322 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 323 ad = Ad->a; 324 ao = Ao->a; 325 if (mat->rmap->n) { 326 i = 0; 327 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 328 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 329 } 330 for (i=1; i<mat->rmap->n; i++) { 331 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 i--; 335 if (mat->rmap->n) { 336 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 337 } 338 if (rank) { 339 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 340 } 341 } 342 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 CHKMEMQ; 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined (PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++) { 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 373 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 375 #endif 376 PetscFunctionReturn(0); 377 } 378 379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 380 { \ 381 if (col <= lastcol1) low1 = 0; \ 382 else high1 = nrow1; \ 383 lastcol1 = col;\ 384 while (high1-low1 > 5) { \ 385 t = (low1+high1)/2; \ 386 if (rp1[t] > col) high1 = t; \ 387 else low1 = t; \ 388 } \ 389 for (_i=low1; _i<high1; _i++) { \ 390 if (rp1[_i] > col) break; \ 391 if (rp1[_i] == col) { \ 392 if (addv == ADD_VALUES) ap1[_i] += value; \ 393 else ap1[_i] = value; \ 394 goto a_noinsert; \ 395 } \ 396 } \ 397 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 398 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 399 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 400 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 401 N = nrow1++ - 1; a->nz++; high1++; \ 402 /* shift up all the later entries in this row */ \ 403 for (ii=N; ii>=_i; ii--) { \ 404 rp1[ii+1] = rp1[ii]; \ 405 ap1[ii+1] = ap1[ii]; \ 406 } \ 407 rp1[_i] = col; \ 408 ap1[_i] = value; \ 409 a_noinsert: ; \ 410 ailen[row] = nrow1; \ 411 } 412 413 414 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 415 { \ 416 if (col <= lastcol2) low2 = 0; \ 417 else high2 = nrow2; \ 418 lastcol2 = col; \ 419 while (high2-low2 > 5) { \ 420 t = (low2+high2)/2; \ 421 if (rp2[t] > col) high2 = t; \ 422 else low2 = t; \ 423 } \ 424 for (_i=low2; _i<high2; _i++) { \ 425 if (rp2[_i] > col) break; \ 426 if (rp2[_i] == col) { \ 427 if (addv == ADD_VALUES) ap2[_i] += value; \ 428 else ap2[_i] = value; \ 429 goto b_noinsert; \ 430 } \ 431 } \ 432 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 433 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 434 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 435 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 436 N = nrow2++ - 1; b->nz++; high2++; \ 437 /* shift up all the later entries in this row */ \ 438 for (ii=N; ii>=_i; ii--) { \ 439 rp2[ii+1] = rp2[ii]; \ 440 ap2[ii+1] = ap2[ii]; \ 441 } \ 442 rp2[_i] = col; \ 443 ap2[_i] = value; \ 444 b_noinsert: ; \ 445 bilen[row] = nrow2; \ 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 450 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 451 { 452 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 453 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 454 PetscErrorCode ierr; 455 PetscInt l,*garray = mat->garray,diag; 456 457 PetscFunctionBegin; 458 /* code only works for square matrices A */ 459 460 /* find size of row to the left of the diagonal part */ 461 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 462 row = row - diag; 463 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 464 if (garray[b->j[b->i[row]+l]] > diag) break; 465 } 466 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 467 468 /* diagonal part */ 469 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* right of diagonal part */ 472 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatSetValues_MPIAIJ" 478 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 479 { 480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 481 PetscScalar value; 482 PetscErrorCode ierr; 483 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 484 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 485 PetscBool roworiented = aij->roworiented; 486 487 /* Some Variables required in the macro */ 488 Mat A = aij->A; 489 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 490 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 491 MatScalar *aa = a->a; 492 PetscBool ignorezeroentries = a->ignorezeroentries; 493 Mat B = aij->B; 494 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 495 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 496 MatScalar *ba = b->a; 497 498 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 499 PetscInt nonew; 500 MatScalar *ap1,*ap2; 501 502 PetscFunctionBegin; 503 if (v) PetscValidScalarPointer(v,6); 504 for (i=0; i<m; i++) { 505 if (im[i] < 0) continue; 506 #if defined(PETSC_USE_DEBUG) 507 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 508 #endif 509 if (im[i] >= rstart && im[i] < rend) { 510 row = im[i] - rstart; 511 lastcol1 = -1; 512 rp1 = aj + ai[row]; 513 ap1 = aa + ai[row]; 514 rmax1 = aimax[row]; 515 nrow1 = ailen[row]; 516 low1 = 0; 517 high1 = nrow1; 518 lastcol2 = -1; 519 rp2 = bj + bi[row]; 520 ap2 = ba + bi[row]; 521 rmax2 = bimax[row]; 522 nrow2 = bilen[row]; 523 low2 = 0; 524 high2 = nrow2; 525 526 for (j=0; j<n; j++) { 527 if (v) { 528 if (roworiented) value = v[i*n+j]; 529 else value = v[i+j*m]; 530 } else value = 0.0; 531 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 532 if (in[j] >= cstart && in[j] < cend) { 533 col = in[j] - cstart; 534 nonew = a->nonew; 535 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 536 } else if (in[j] < 0) continue; 537 #if defined(PETSC_USE_DEBUG) 538 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 539 #endif 540 else { 541 if (mat->was_assembled) { 542 if (!aij->colmap) { 543 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 544 } 545 #if defined (PETSC_USE_CTABLE) 546 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 547 col--; 548 #else 549 col = aij->colmap[in[j]] - 1; 550 #endif 551 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 552 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 553 col = in[j]; 554 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 555 B = aij->B; 556 b = (Mat_SeqAIJ*)B->data; 557 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 558 rp2 = bj + bi[row]; 559 ap2 = ba + bi[row]; 560 rmax2 = bimax[row]; 561 nrow2 = bilen[row]; 562 low2 = 0; 563 high2 = nrow2; 564 bm = aij->B->rmap->n; 565 ba = b->a; 566 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 567 } else col = in[j]; 568 nonew = b->nonew; 569 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 570 } 571 } 572 } else { 573 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 574 if (!aij->donotstash) { 575 mat->assembled = PETSC_FALSE; 576 if (roworiented) { 577 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 578 } else { 579 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 580 } 581 } 582 } 583 } 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "MatGetValues_MPIAIJ" 589 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 590 { 591 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 592 PetscErrorCode ierr; 593 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 594 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 595 596 PetscFunctionBegin; 597 for (i=0; i<m; i++) { 598 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 599 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 600 if (idxm[i] >= rstart && idxm[i] < rend) { 601 row = idxm[i] - rstart; 602 for (j=0; j<n; j++) { 603 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 604 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 605 if (idxn[j] >= cstart && idxn[j] < cend) { 606 col = idxn[j] - cstart; 607 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 608 } else { 609 if (!aij->colmap) { 610 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 611 } 612 #if defined (PETSC_USE_CTABLE) 613 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 614 col --; 615 #else 616 col = aij->colmap[idxn[j]] - 1; 617 #endif 618 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 619 else { 620 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 621 } 622 } 623 } 624 } else { 625 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 626 } 627 } 628 PetscFunctionReturn(0); 629 } 630 631 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 635 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 636 { 637 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 638 PetscErrorCode ierr; 639 PetscInt nstash,reallocs; 640 InsertMode addv; 641 642 PetscFunctionBegin; 643 if (aij->donotstash || mat->nooffprocentries) { 644 PetscFunctionReturn(0); 645 } 646 647 /* make sure all processors are either in INSERTMODE or ADDMODE */ 648 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 649 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 650 mat->insertmode = addv; /* in case this processor had no cache */ 651 652 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 653 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 654 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 660 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 661 { 662 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 663 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 664 PetscErrorCode ierr; 665 PetscMPIInt n; 666 PetscInt i,j,rstart,ncols,flg; 667 PetscInt *row,*col; 668 PetscBool other_disassembled; 669 PetscScalar *val; 670 InsertMode addv = mat->insertmode; 671 672 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n;) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 682 if (j < n) ncols = j-i; 683 else ncols = n-i; 684 /* Now assemble all these values with a single function call */ 685 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 686 i = j; 687 } 688 } 689 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 690 } 691 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 692 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 693 694 /* determine if any processor has disassembled, if so we must 695 also disassemble ourselfs, in order that we may reassemble. */ 696 /* 697 if nonzero structure of submatrix B cannot change then we know that 698 no processor disassembled thus we can skip this stuff 699 */ 700 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 701 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 702 if (mat->was_assembled && !other_disassembled) { 703 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 704 } 705 } 706 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 707 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 708 } 709 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 710 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 711 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 712 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 713 714 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 715 aij->rowvalues = 0; 716 717 /* used by MatAXPY() */ 718 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 719 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 720 721 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 722 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 728 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 729 { 730 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 731 PetscErrorCode ierr; 732 733 PetscFunctionBegin; 734 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 735 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatZeroRows_MPIAIJ" 741 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 742 { 743 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 744 PetscErrorCode ierr; 745 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 746 PetscInt i,*owners = A->rmap->range; 747 PetscInt *nprocs,j,idx,nsends,row; 748 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 749 PetscInt *rvalues,count,base,slen,*source; 750 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 751 MPI_Comm comm = ((PetscObject)A)->comm; 752 MPI_Request *send_waits,*recv_waits; 753 MPI_Status recv_status,*send_status; 754 const PetscScalar *xx; 755 PetscScalar *bb; 756 #if defined(PETSC_DEBUG) 757 PetscBool found = PETSC_FALSE; 758 #endif 759 760 PetscFunctionBegin; 761 /* first count number of contributors to each processor */ 762 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 763 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 764 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 765 j = 0; 766 for (i=0; i<N; i++) { 767 if (lastidx > (idx = rows[i])) j = 0; 768 lastidx = idx; 769 for (; j<size; j++) { 770 if (idx >= owners[j] && idx < owners[j+1]) { 771 nprocs[2*j]++; 772 nprocs[2*j+1] = 1; 773 owner[i] = j; 774 #if defined(PETSC_DEBUG) 775 found = PETSC_TRUE; 776 #endif 777 break; 778 } 779 } 780 #if defined(PETSC_DEBUG) 781 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 782 found = PETSC_FALSE; 783 #endif 784 } 785 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 786 787 if (A->nooffproczerorows) { 788 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 789 nrecvs = nsends; 790 nmax = N; 791 } else { 792 /* inform other processors of number of messages and max length*/ 793 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 794 } 795 796 /* post receives: */ 797 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 798 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 799 for (i=0; i<nrecvs; i++) { 800 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 801 } 802 803 /* do sends: 804 1) starts[i] gives the starting index in svalues for stuff going to 805 the ith processor 806 */ 807 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 808 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 809 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 810 starts[0] = 0; 811 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 812 for (i=0; i<N; i++) { 813 svalues[starts[owner[i]]++] = rows[i]; 814 } 815 816 starts[0] = 0; 817 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 818 count = 0; 819 for (i=0; i<size; i++) { 820 if (nprocs[2*i+1]) { 821 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 822 } 823 } 824 ierr = PetscFree(starts);CHKERRQ(ierr); 825 826 base = owners[rank]; 827 828 /* wait on receives */ 829 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 830 count = nrecvs; slen = 0; 831 while (count) { 832 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 833 /* unpack receives into our local space */ 834 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 835 source[imdex] = recv_status.MPI_SOURCE; 836 lens[imdex] = n; 837 slen += n; 838 count--; 839 } 840 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 841 842 /* move the data into the send scatter */ 843 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 844 count = 0; 845 for (i=0; i<nrecvs; i++) { 846 values = rvalues + i*nmax; 847 for (j=0; j<lens[i]; j++) { 848 lrows[count++] = values[j] - base; 849 } 850 } 851 ierr = PetscFree(rvalues);CHKERRQ(ierr); 852 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 853 ierr = PetscFree(owner);CHKERRQ(ierr); 854 ierr = PetscFree(nprocs);CHKERRQ(ierr); 855 856 /* fix right hand side if needed */ 857 if (x && b) { 858 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 859 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 860 for (i=0; i<slen; i++) { 861 bb[lrows[i]] = diag*xx[lrows[i]]; 862 } 863 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 864 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 865 } 866 /* 867 Zero the required rows. If the "diagonal block" of the matrix 868 is square and the user wishes to set the diagonal we use separate 869 code so that MatSetValues() is not called for each diagonal allocating 870 new memory, thus calling lots of mallocs and slowing things down. 871 872 */ 873 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 874 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 875 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 876 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 877 } else if (diag != 0.0) { 878 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 879 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 880 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 881 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 882 } 883 for (i = 0; i < slen; i++) { 884 row = lrows[i] + rstart; 885 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 886 } 887 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 889 } else { 890 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 891 } 892 ierr = PetscFree(lrows);CHKERRQ(ierr); 893 894 /* wait on sends */ 895 if (nsends) { 896 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 897 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 898 ierr = PetscFree(send_status);CHKERRQ(ierr); 899 } 900 ierr = PetscFree(send_waits);CHKERRQ(ierr); 901 ierr = PetscFree(svalues);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904 905 #undef __FUNCT__ 906 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 907 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 908 { 909 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 910 PetscErrorCode ierr; 911 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 912 PetscInt i,*owners = A->rmap->range; 913 PetscInt *nprocs,j,idx,nsends; 914 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 915 PetscInt *rvalues,count,base,slen,*source; 916 PetscInt *lens,*lrows,*values,m; 917 MPI_Comm comm = ((PetscObject)A)->comm; 918 MPI_Request *send_waits,*recv_waits; 919 MPI_Status recv_status,*send_status; 920 const PetscScalar *xx; 921 PetscScalar *bb,*mask; 922 Vec xmask,lmask; 923 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 924 const PetscInt *aj, *ii,*ridx; 925 PetscScalar *aa; 926 #if defined(PETSC_DEBUG) 927 PetscBool found = PETSC_FALSE; 928 #endif 929 930 PetscFunctionBegin; 931 /* first count number of contributors to each processor */ 932 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 933 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 934 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 935 j = 0; 936 for (i=0; i<N; i++) { 937 if (lastidx > (idx = rows[i])) j = 0; 938 lastidx = idx; 939 for (; j<size; j++) { 940 if (idx >= owners[j] && idx < owners[j+1]) { 941 nprocs[2*j]++; 942 nprocs[2*j+1] = 1; 943 owner[i] = j; 944 #if defined(PETSC_DEBUG) 945 found = PETSC_TRUE; 946 #endif 947 break; 948 } 949 } 950 #if defined(PETSC_DEBUG) 951 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 952 found = PETSC_FALSE; 953 #endif 954 } 955 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 956 957 /* inform other processors of number of messages and max length*/ 958 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 959 960 /* post receives: */ 961 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 963 for (i=0; i<nrecvs; i++) { 964 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 965 } 966 967 /* do sends: 968 1) starts[i] gives the starting index in svalues for stuff going to 969 the ith processor 970 */ 971 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 972 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 973 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 974 starts[0] = 0; 975 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 976 for (i=0; i<N; i++) { 977 svalues[starts[owner[i]]++] = rows[i]; 978 } 979 980 starts[0] = 0; 981 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 982 count = 0; 983 for (i=0; i<size; i++) { 984 if (nprocs[2*i+1]) { 985 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 986 } 987 } 988 ierr = PetscFree(starts);CHKERRQ(ierr); 989 990 base = owners[rank]; 991 992 /* wait on receives */ 993 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 994 count = nrecvs; slen = 0; 995 while (count) { 996 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 997 /* unpack receives into our local space */ 998 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 999 source[imdex] = recv_status.MPI_SOURCE; 1000 lens[imdex] = n; 1001 slen += n; 1002 count--; 1003 } 1004 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1005 1006 /* move the data into the send scatter */ 1007 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1008 count = 0; 1009 for (i=0; i<nrecvs; i++) { 1010 values = rvalues + i*nmax; 1011 for (j=0; j<lens[i]; j++) { 1012 lrows[count++] = values[j] - base; 1013 } 1014 } 1015 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1016 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1017 ierr = PetscFree(owner);CHKERRQ(ierr); 1018 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1019 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1020 1021 /* zero diagonal part of matrix */ 1022 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1023 1024 /* handle off diagonal part of matrix */ 1025 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1026 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1027 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1028 for (i=0; i<slen; i++) { 1029 bb[lrows[i]] = 1; 1030 } 1031 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1035 if (x) { 1036 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1039 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1040 } 1041 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1042 1043 /* remove zeroed rows of off diagonal matrix */ 1044 ii = aij->i; 1045 for (i=0; i<slen; i++) { 1046 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1047 } 1048 1049 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1050 if (aij->compressedrow.use) { 1051 m = aij->compressedrow.nrows; 1052 ii = aij->compressedrow.i; 1053 ridx = aij->compressedrow.rindex; 1054 for (i=0; i<m; i++) { 1055 n = ii[i+1] - ii[i]; 1056 aj = aij->j + ii[i]; 1057 aa = aij->a + ii[i]; 1058 1059 for (j=0; j<n; j++) { 1060 if (PetscAbsScalar(mask[*aj])) { 1061 if (b) bb[*ridx] -= *aa*xx[*aj]; 1062 *aa = 0.0; 1063 } 1064 aa++; 1065 aj++; 1066 } 1067 ridx++; 1068 } 1069 } else { /* do not use compressed row format */ 1070 m = l->B->rmap->n; 1071 for (i=0; i<m; i++) { 1072 n = ii[i+1] - ii[i]; 1073 aj = aij->j + ii[i]; 1074 aa = aij->a + ii[i]; 1075 for (j=0; j<n; j++) { 1076 if (PetscAbsScalar(mask[*aj])) { 1077 if (b) bb[i] -= *aa*xx[*aj]; 1078 *aa = 0.0; 1079 } 1080 aa++; 1081 aj++; 1082 } 1083 } 1084 } 1085 if (x) { 1086 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1087 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1088 } 1089 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1090 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1091 ierr = PetscFree(lrows);CHKERRQ(ierr); 1092 1093 /* wait on sends */ 1094 if (nsends) { 1095 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1096 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1097 ierr = PetscFree(send_status);CHKERRQ(ierr); 1098 } 1099 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1100 ierr = PetscFree(svalues);CHKERRQ(ierr); 1101 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatMult_MPIAIJ" 1107 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1108 { 1109 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1110 PetscErrorCode ierr; 1111 PetscInt nt; 1112 1113 PetscFunctionBegin; 1114 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1115 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1125 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1137 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1138 { 1139 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1144 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1145 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1146 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1152 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1153 { 1154 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1155 PetscErrorCode ierr; 1156 PetscBool merged; 1157 1158 PetscFunctionBegin; 1159 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1160 /* do nondiagonal part */ 1161 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1162 if (!merged) { 1163 /* send it on its way */ 1164 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 /* do local part */ 1166 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1167 /* receive remote parts: note this assumes the values are not actually */ 1168 /* added in yy until the next line, */ 1169 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 } else { 1171 /* do local part */ 1172 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1173 /* send it on its way */ 1174 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1175 /* values actually were received in the Begin() but we need to call this nop */ 1176 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 EXTERN_C_BEGIN 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1184 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1185 { 1186 MPI_Comm comm; 1187 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1188 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1189 IS Me,Notme; 1190 PetscErrorCode ierr; 1191 PetscInt M,N,first,last,*notme,i; 1192 PetscMPIInt size; 1193 1194 PetscFunctionBegin; 1195 1196 /* Easy test: symmetric diagonal block */ 1197 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1198 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1199 if (!*f) PetscFunctionReturn(0); 1200 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1201 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1202 if (size == 1) PetscFunctionReturn(0); 1203 1204 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1205 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1206 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1207 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1208 for (i=0; i<first; i++) notme[i] = i; 1209 for (i=last; i<M; i++) notme[i-last+first] = i; 1210 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1211 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1212 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1213 Aoff = Aoffs[0]; 1214 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1215 Boff = Boffs[0]; 1216 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1217 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1218 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1219 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1220 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1221 ierr = PetscFree(notme);CHKERRQ(ierr); 1222 PetscFunctionReturn(0); 1223 } 1224 EXTERN_C_END 1225 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1228 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1229 { 1230 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 /* do nondiagonal part */ 1235 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1236 /* send it on its way */ 1237 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1238 /* do local part */ 1239 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1240 /* receive remote parts */ 1241 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 /* 1246 This only works correctly for square matrices where the subblock A->A is the 1247 diagonal block 1248 */ 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1251 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1252 { 1253 PetscErrorCode ierr; 1254 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1255 1256 PetscFunctionBegin; 1257 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1258 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1259 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatScale_MPIAIJ" 1265 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1266 { 1267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1268 PetscErrorCode ierr; 1269 1270 PetscFunctionBegin; 1271 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1272 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatDestroy_MPIAIJ" 1278 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1279 { 1280 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1281 PetscErrorCode ierr; 1282 1283 PetscFunctionBegin; 1284 #if defined(PETSC_USE_LOG) 1285 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1286 #endif 1287 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1288 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1289 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1290 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1291 #if defined (PETSC_USE_CTABLE) 1292 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1293 #else 1294 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1295 #endif 1296 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1297 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1298 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1299 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1300 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1301 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1302 1303 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1317 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1318 { 1319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1320 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1321 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1322 PetscErrorCode ierr; 1323 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1324 int fd; 1325 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1326 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1327 PetscScalar *column_values; 1328 PetscInt message_count,flowcontrolcount; 1329 FILE *file; 1330 1331 PetscFunctionBegin; 1332 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1333 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1334 nz = A->nz + B->nz; 1335 if (!rank) { 1336 header[0] = MAT_FILE_CLASSID; 1337 header[1] = mat->rmap->N; 1338 header[2] = mat->cmap->N; 1339 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1341 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1342 /* get largest number of rows any processor has */ 1343 rlen = mat->rmap->n; 1344 range = mat->rmap->range; 1345 for (i=1; i<size; i++) { 1346 rlen = PetscMax(rlen,range[i+1] - range[i]); 1347 } 1348 } else { 1349 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1350 rlen = mat->rmap->n; 1351 } 1352 1353 /* load up the local row counts */ 1354 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1355 for (i=0; i<mat->rmap->n; i++) { 1356 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1357 } 1358 1359 /* store the row lengths to the file */ 1360 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1361 if (!rank) { 1362 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1363 for (i=1; i<size; i++) { 1364 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1365 rlen = range[i+1] - range[i]; 1366 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1367 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1368 } 1369 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1370 } else { 1371 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1372 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1373 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1374 } 1375 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1376 1377 /* load up the local column indices */ 1378 nzmax = nz; /* )th processor needs space a largest processor needs */ 1379 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1380 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1381 cnt = 0; 1382 for (i=0; i<mat->rmap->n; i++) { 1383 for (j=B->i[i]; j<B->i[i+1]; j++) { 1384 if ( (col = garray[B->j[j]]) > cstart) break; 1385 column_indices[cnt++] = col; 1386 } 1387 for (k=A->i[i]; k<A->i[i+1]; k++) { 1388 column_indices[cnt++] = A->j[k] + cstart; 1389 } 1390 for (; j<B->i[i+1]; j++) { 1391 column_indices[cnt++] = garray[B->j[j]]; 1392 } 1393 } 1394 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1395 1396 /* store the column indices to the file */ 1397 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1398 if (!rank) { 1399 MPI_Status status; 1400 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1401 for (i=1; i<size; i++) { 1402 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1403 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1404 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1405 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1406 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1407 } 1408 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1409 } else { 1410 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1411 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1412 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1413 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1414 } 1415 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1416 1417 /* load up the local column values */ 1418 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1419 cnt = 0; 1420 for (i=0; i<mat->rmap->n; i++) { 1421 for (j=B->i[i]; j<B->i[i+1]; j++) { 1422 if ( garray[B->j[j]] > cstart) break; 1423 column_values[cnt++] = B->a[j]; 1424 } 1425 for (k=A->i[i]; k<A->i[i+1]; k++) { 1426 column_values[cnt++] = A->a[k]; 1427 } 1428 for (; j<B->i[i+1]; j++) { 1429 column_values[cnt++] = B->a[j]; 1430 } 1431 } 1432 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1433 1434 /* store the column values to the file */ 1435 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1436 if (!rank) { 1437 MPI_Status status; 1438 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1439 for (i=1; i<size; i++) { 1440 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1441 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1442 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1443 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1444 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1445 } 1446 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1447 } else { 1448 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1449 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1450 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1451 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1452 } 1453 ierr = PetscFree(column_values);CHKERRQ(ierr); 1454 1455 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1456 if (file) { 1457 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1458 } 1459 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1465 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1466 { 1467 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1468 PetscErrorCode ierr; 1469 PetscMPIInt rank = aij->rank,size = aij->size; 1470 PetscBool isdraw,iascii,isbinary; 1471 PetscViewer sviewer; 1472 PetscViewerFormat format; 1473 1474 PetscFunctionBegin; 1475 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1476 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1477 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1478 if (iascii) { 1479 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1480 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1481 MatInfo info; 1482 PetscBool inodes; 1483 1484 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1485 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1486 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1487 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1488 if (!inodes) { 1489 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1490 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1491 } else { 1492 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1493 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1494 } 1495 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1496 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1497 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1498 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1499 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1500 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1501 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1502 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1505 PetscInt inodecount,inodelimit,*inodes; 1506 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1507 if (inodes) { 1508 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1509 } else { 1510 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1511 } 1512 PetscFunctionReturn(0); 1513 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1514 PetscFunctionReturn(0); 1515 } 1516 } else if (isbinary) { 1517 if (size == 1) { 1518 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1519 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1520 } else { 1521 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1522 } 1523 PetscFunctionReturn(0); 1524 } else if (isdraw) { 1525 PetscDraw draw; 1526 PetscBool isnull; 1527 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1528 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1529 } 1530 1531 if (size == 1) { 1532 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1533 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1534 } else { 1535 /* assemble the entire matrix onto first processor. */ 1536 Mat A; 1537 Mat_SeqAIJ *Aloc; 1538 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1539 MatScalar *a; 1540 1541 if (mat->rmap->N > 1024) { 1542 PetscBool flg = PETSC_FALSE; 1543 1544 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1545 if (!flg) { 1546 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1547 } 1548 } 1549 1550 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1551 if (!rank) { 1552 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1553 } else { 1554 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1555 } 1556 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1557 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1558 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1559 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1560 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1561 1562 /* copy over the A part */ 1563 Aloc = (Mat_SeqAIJ*)aij->A->data; 1564 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1565 row = mat->rmap->rstart; 1566 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1567 for (i=0; i<m; i++) { 1568 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1569 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1570 } 1571 aj = Aloc->j; 1572 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1573 1574 /* copy over the B part */ 1575 Aloc = (Mat_SeqAIJ*)aij->B->data; 1576 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1577 row = mat->rmap->rstart; 1578 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1579 ct = cols; 1580 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1581 for (i=0; i<m; i++) { 1582 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1583 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1584 } 1585 ierr = PetscFree(ct);CHKERRQ(ierr); 1586 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1587 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1588 /* 1589 Everyone has to call to draw the matrix since the graphics waits are 1590 synchronized across all processors that share the PetscDraw object 1591 */ 1592 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1593 if (!rank) { 1594 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1595 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1596 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1597 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1598 } 1599 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1600 ierr = MatDestroy(&A);CHKERRQ(ierr); 1601 } 1602 PetscFunctionReturn(0); 1603 } 1604 1605 #undef __FUNCT__ 1606 #define __FUNCT__ "MatView_MPIAIJ" 1607 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1608 { 1609 PetscErrorCode ierr; 1610 PetscBool iascii,isdraw,issocket,isbinary; 1611 1612 PetscFunctionBegin; 1613 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1614 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1615 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1616 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1617 if (iascii || isdraw || isbinary || issocket) { 1618 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1619 } else { 1620 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1621 } 1622 PetscFunctionReturn(0); 1623 } 1624 1625 #undef __FUNCT__ 1626 #define __FUNCT__ "MatSOR_MPIAIJ" 1627 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1628 { 1629 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1630 PetscErrorCode ierr; 1631 Vec bb1 = 0; 1632 PetscBool hasop; 1633 1634 PetscFunctionBegin; 1635 if (flag == SOR_APPLY_UPPER) { 1636 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1637 PetscFunctionReturn(0); 1638 } 1639 1640 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1641 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1642 } 1643 1644 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1645 if (flag & SOR_ZERO_INITIAL_GUESS) { 1646 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1647 its--; 1648 } 1649 1650 while (its--) { 1651 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1652 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1653 1654 /* update rhs: bb1 = bb - B*x */ 1655 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1656 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1657 1658 /* local sweep */ 1659 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1660 } 1661 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1662 if (flag & SOR_ZERO_INITIAL_GUESS) { 1663 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1664 its--; 1665 } 1666 while (its--) { 1667 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1668 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1669 1670 /* update rhs: bb1 = bb - B*x */ 1671 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1672 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1673 1674 /* local sweep */ 1675 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1676 } 1677 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1678 if (flag & SOR_ZERO_INITIAL_GUESS) { 1679 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1680 its--; 1681 } 1682 while (its--) { 1683 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1684 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1685 1686 /* update rhs: bb1 = bb - B*x */ 1687 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1688 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1689 1690 /* local sweep */ 1691 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1692 } 1693 } else if (flag & SOR_EISENSTAT) { 1694 Vec xx1; 1695 1696 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1697 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1698 1699 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1700 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1701 if (!mat->diag) { 1702 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1703 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1704 } 1705 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1706 if (hasop) { 1707 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1708 } else { 1709 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1710 } 1711 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1712 1713 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1714 1715 /* local sweep */ 1716 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1717 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1718 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1719 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1720 1721 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1722 PetscFunctionReturn(0); 1723 } 1724 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatPermute_MPIAIJ" 1727 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1728 { 1729 Mat aA,aB,Aperm; 1730 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1731 PetscScalar *aa,*ba; 1732 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1733 PetscSF rowsf,sf; 1734 IS parcolp = PETSC_NULL; 1735 PetscBool done; 1736 PetscErrorCode ierr; 1737 1738 PetscFunctionBegin; 1739 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1740 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1741 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1742 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1743 1744 /* Invert row permutation to find out where my rows should go */ 1745 ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr); 1746 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1747 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1748 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1749 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1750 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1751 1752 /* Invert column permutation to find out where my columns should go */ 1753 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1754 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1755 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1756 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1757 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1758 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1759 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1760 1761 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1762 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1763 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1764 1765 /* Find out where my gcols should go */ 1766 ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr); 1767 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1768 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1769 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1770 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1771 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1772 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1773 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1774 1775 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1776 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1777 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1778 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1779 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1780 for (i=0; i<m; i++) { 1781 PetscInt row = rdest[i],rowner; 1782 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1783 for (j=ai[i]; j<ai[i+1]; j++) { 1784 PetscInt cowner,col = cdest[aj[j]]; 1785 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1786 if (rowner == cowner) dnnz[i]++; 1787 else onnz[i]++; 1788 } 1789 for (j=bi[i]; j<bi[i+1]; j++) { 1790 PetscInt cowner,col = gcdest[bj[j]]; 1791 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1792 if (rowner == cowner) dnnz[i]++; 1793 else onnz[i]++; 1794 } 1795 } 1796 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1797 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1798 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1799 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1800 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1801 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1802 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1803 1804 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1805 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1806 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1807 for (i=0; i<m; i++) { 1808 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1809 PetscInt rowlen; 1810 rowlen = ai[i+1] - ai[i]; 1811 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1812 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1813 rowlen = bi[i+1] - bi[i]; 1814 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1815 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1816 } 1817 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1818 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1819 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1820 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1821 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1822 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1823 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1824 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1825 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1826 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1827 *B = Aperm; 1828 PetscFunctionReturn(0); 1829 } 1830 1831 #undef __FUNCT__ 1832 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1833 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1834 { 1835 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1836 Mat A = mat->A,B = mat->B; 1837 PetscErrorCode ierr; 1838 PetscReal isend[5],irecv[5]; 1839 1840 PetscFunctionBegin; 1841 info->block_size = 1.0; 1842 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1843 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1844 isend[3] = info->memory; isend[4] = info->mallocs; 1845 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1846 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1847 isend[3] += info->memory; isend[4] += info->mallocs; 1848 if (flag == MAT_LOCAL) { 1849 info->nz_used = isend[0]; 1850 info->nz_allocated = isend[1]; 1851 info->nz_unneeded = isend[2]; 1852 info->memory = isend[3]; 1853 info->mallocs = isend[4]; 1854 } else if (flag == MAT_GLOBAL_MAX) { 1855 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1856 info->nz_used = irecv[0]; 1857 info->nz_allocated = irecv[1]; 1858 info->nz_unneeded = irecv[2]; 1859 info->memory = irecv[3]; 1860 info->mallocs = irecv[4]; 1861 } else if (flag == MAT_GLOBAL_SUM) { 1862 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1863 info->nz_used = irecv[0]; 1864 info->nz_allocated = irecv[1]; 1865 info->nz_unneeded = irecv[2]; 1866 info->memory = irecv[3]; 1867 info->mallocs = irecv[4]; 1868 } 1869 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1870 info->fill_ratio_needed = 0; 1871 info->factor_mallocs = 0; 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "MatSetOption_MPIAIJ" 1877 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1878 { 1879 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1880 PetscErrorCode ierr; 1881 1882 PetscFunctionBegin; 1883 switch (op) { 1884 case MAT_NEW_NONZERO_LOCATIONS: 1885 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1886 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1887 case MAT_KEEP_NONZERO_PATTERN: 1888 case MAT_NEW_NONZERO_LOCATION_ERR: 1889 case MAT_USE_INODES: 1890 case MAT_IGNORE_ZERO_ENTRIES: 1891 MatCheckPreallocated(A,1); 1892 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1893 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1894 break; 1895 case MAT_ROW_ORIENTED: 1896 a->roworiented = flg; 1897 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1898 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1899 break; 1900 case MAT_NEW_DIAGONALS: 1901 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1902 break; 1903 case MAT_IGNORE_OFF_PROC_ENTRIES: 1904 a->donotstash = flg; 1905 break; 1906 case MAT_SPD: 1907 A->spd_set = PETSC_TRUE; 1908 A->spd = flg; 1909 if (flg) { 1910 A->symmetric = PETSC_TRUE; 1911 A->structurally_symmetric = PETSC_TRUE; 1912 A->symmetric_set = PETSC_TRUE; 1913 A->structurally_symmetric_set = PETSC_TRUE; 1914 } 1915 break; 1916 case MAT_SYMMETRIC: 1917 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1918 break; 1919 case MAT_STRUCTURALLY_SYMMETRIC: 1920 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1921 break; 1922 case MAT_HERMITIAN: 1923 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1924 break; 1925 case MAT_SYMMETRY_ETERNAL: 1926 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1927 break; 1928 default: 1929 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1930 } 1931 PetscFunctionReturn(0); 1932 } 1933 1934 #undef __FUNCT__ 1935 #define __FUNCT__ "MatGetRow_MPIAIJ" 1936 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1937 { 1938 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1939 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1940 PetscErrorCode ierr; 1941 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1942 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1943 PetscInt *cmap,*idx_p; 1944 1945 PetscFunctionBegin; 1946 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1947 mat->getrowactive = PETSC_TRUE; 1948 1949 if (!mat->rowvalues && (idx || v)) { 1950 /* 1951 allocate enough space to hold information from the longest row. 1952 */ 1953 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1954 PetscInt max = 1,tmp; 1955 for (i=0; i<matin->rmap->n; i++) { 1956 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1957 if (max < tmp) { max = tmp; } 1958 } 1959 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1960 } 1961 1962 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1963 lrow = row - rstart; 1964 1965 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1966 if (!v) {pvA = 0; pvB = 0;} 1967 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1968 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1969 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1970 nztot = nzA + nzB; 1971 1972 cmap = mat->garray; 1973 if (v || idx) { 1974 if (nztot) { 1975 /* Sort by increasing column numbers, assuming A and B already sorted */ 1976 PetscInt imark = -1; 1977 if (v) { 1978 *v = v_p = mat->rowvalues; 1979 for (i=0; i<nzB; i++) { 1980 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1981 else break; 1982 } 1983 imark = i; 1984 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1985 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1986 } 1987 if (idx) { 1988 *idx = idx_p = mat->rowindices; 1989 if (imark > -1) { 1990 for (i=0; i<imark; i++) { 1991 idx_p[i] = cmap[cworkB[i]]; 1992 } 1993 } else { 1994 for (i=0; i<nzB; i++) { 1995 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1996 else break; 1997 } 1998 imark = i; 1999 } 2000 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 2001 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 2002 } 2003 } else { 2004 if (idx) *idx = 0; 2005 if (v) *v = 0; 2006 } 2007 } 2008 *nz = nztot; 2009 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 2010 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 2011 PetscFunctionReturn(0); 2012 } 2013 2014 #undef __FUNCT__ 2015 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 2016 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2017 { 2018 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2019 2020 PetscFunctionBegin; 2021 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2022 aij->getrowactive = PETSC_FALSE; 2023 PetscFunctionReturn(0); 2024 } 2025 2026 #undef __FUNCT__ 2027 #define __FUNCT__ "MatNorm_MPIAIJ" 2028 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2029 { 2030 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2031 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2032 PetscErrorCode ierr; 2033 PetscInt i,j,cstart = mat->cmap->rstart; 2034 PetscReal sum = 0.0; 2035 MatScalar *v; 2036 2037 PetscFunctionBegin; 2038 if (aij->size == 1) { 2039 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2040 } else { 2041 if (type == NORM_FROBENIUS) { 2042 v = amat->a; 2043 for (i=0; i<amat->nz; i++) { 2044 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2045 } 2046 v = bmat->a; 2047 for (i=0; i<bmat->nz; i++) { 2048 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2049 } 2050 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2051 *norm = PetscSqrtReal(*norm); 2052 } else if (type == NORM_1) { /* max column norm */ 2053 PetscReal *tmp,*tmp2; 2054 PetscInt *jj,*garray = aij->garray; 2055 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2056 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2057 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2058 *norm = 0.0; 2059 v = amat->a; jj = amat->j; 2060 for (j=0; j<amat->nz; j++) { 2061 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 2062 } 2063 v = bmat->a; jj = bmat->j; 2064 for (j=0; j<bmat->nz; j++) { 2065 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2066 } 2067 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2068 for (j=0; j<mat->cmap->N; j++) { 2069 if (tmp2[j] > *norm) *norm = tmp2[j]; 2070 } 2071 ierr = PetscFree(tmp);CHKERRQ(ierr); 2072 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2073 } else if (type == NORM_INFINITY) { /* max row norm */ 2074 PetscReal ntemp = 0.0; 2075 for (j=0; j<aij->A->rmap->n; j++) { 2076 v = amat->a + amat->i[j]; 2077 sum = 0.0; 2078 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2079 sum += PetscAbsScalar(*v); v++; 2080 } 2081 v = bmat->a + bmat->i[j]; 2082 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2083 sum += PetscAbsScalar(*v); v++; 2084 } 2085 if (sum > ntemp) ntemp = sum; 2086 } 2087 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2088 } else { 2089 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2090 } 2091 } 2092 PetscFunctionReturn(0); 2093 } 2094 2095 #undef __FUNCT__ 2096 #define __FUNCT__ "MatTranspose_MPIAIJ" 2097 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2098 { 2099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2100 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2101 PetscErrorCode ierr; 2102 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2103 PetscInt cstart=A->cmap->rstart,ncol; 2104 Mat B; 2105 MatScalar *array; 2106 2107 PetscFunctionBegin; 2108 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2109 2110 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2111 ai = Aloc->i; aj = Aloc->j; 2112 bi = Bloc->i; bj = Bloc->j; 2113 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2114 PetscInt *d_nnz,*g_nnz,*o_nnz; 2115 PetscSFNode *oloc; 2116 PETSC_UNUSED PetscSF sf; 2117 2118 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2119 /* compute d_nnz for preallocation */ 2120 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2121 for (i=0; i<ai[ma]; i++) { 2122 d_nnz[aj[i]] ++; 2123 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2124 } 2125 /* compute local off-diagonal contributions */ 2126 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2127 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2128 /* map those to global */ 2129 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 2130 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2131 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2132 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2133 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2134 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2135 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2136 2137 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2138 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2139 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2140 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2141 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2142 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2143 } else { 2144 B = *matout; 2145 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2146 for (i=0; i<ai[ma]; i++) { 2147 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2148 } 2149 } 2150 2151 /* copy over the A part */ 2152 array = Aloc->a; 2153 row = A->rmap->rstart; 2154 for (i=0; i<ma; i++) { 2155 ncol = ai[i+1]-ai[i]; 2156 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2157 row++; array += ncol; aj += ncol; 2158 } 2159 aj = Aloc->j; 2160 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2161 2162 /* copy over the B part */ 2163 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2164 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2165 array = Bloc->a; 2166 row = A->rmap->rstart; 2167 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2168 cols_tmp = cols; 2169 for (i=0; i<mb; i++) { 2170 ncol = bi[i+1]-bi[i]; 2171 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2172 row++; array += ncol; cols_tmp += ncol; 2173 } 2174 ierr = PetscFree(cols);CHKERRQ(ierr); 2175 2176 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2177 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2178 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2179 *matout = B; 2180 } else { 2181 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2182 } 2183 PetscFunctionReturn(0); 2184 } 2185 2186 #undef __FUNCT__ 2187 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2188 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2189 { 2190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2191 Mat a = aij->A,b = aij->B; 2192 PetscErrorCode ierr; 2193 PetscInt s1,s2,s3; 2194 2195 PetscFunctionBegin; 2196 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2197 if (rr) { 2198 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2199 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2200 /* Overlap communication with computation. */ 2201 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2202 } 2203 if (ll) { 2204 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2205 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2206 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2207 } 2208 /* scale the diagonal block */ 2209 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2210 2211 if (rr) { 2212 /* Do a scatter end and then right scale the off-diagonal block */ 2213 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2214 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2215 } 2216 2217 PetscFunctionReturn(0); 2218 } 2219 2220 #undef __FUNCT__ 2221 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2222 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2223 { 2224 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2225 PetscErrorCode ierr; 2226 2227 PetscFunctionBegin; 2228 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "MatEqual_MPIAIJ" 2234 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2235 { 2236 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2237 Mat a,b,c,d; 2238 PetscBool flg; 2239 PetscErrorCode ierr; 2240 2241 PetscFunctionBegin; 2242 a = matA->A; b = matA->B; 2243 c = matB->A; d = matB->B; 2244 2245 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2246 if (flg) { 2247 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2248 } 2249 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2250 PetscFunctionReturn(0); 2251 } 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "MatCopy_MPIAIJ" 2255 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2256 { 2257 PetscErrorCode ierr; 2258 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2259 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2260 2261 PetscFunctionBegin; 2262 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2263 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2264 /* because of the column compression in the off-processor part of the matrix a->B, 2265 the number of columns in a->B and b->B may be different, hence we cannot call 2266 the MatCopy() directly on the two parts. If need be, we can provide a more 2267 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2268 then copying the submatrices */ 2269 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2270 } else { 2271 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2272 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2273 } 2274 PetscFunctionReturn(0); 2275 } 2276 2277 #undef __FUNCT__ 2278 #define __FUNCT__ "MatSetUp_MPIAIJ" 2279 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2280 { 2281 PetscErrorCode ierr; 2282 2283 PetscFunctionBegin; 2284 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2285 PetscFunctionReturn(0); 2286 } 2287 2288 #undef __FUNCT__ 2289 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2290 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2291 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2292 { 2293 PetscInt i,m=Y->rmap->N; 2294 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2295 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2296 const PetscInt *xi = x->i,*yi = y->i; 2297 2298 PetscFunctionBegin; 2299 /* Set the number of nonzeros in the new matrix */ 2300 for (i=0; i<m; i++) { 2301 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2302 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2303 nnz[i] = 0; 2304 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2305 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2306 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2307 nnz[i]++; 2308 } 2309 for (; k<nzy; k++) nnz[i]++; 2310 } 2311 PetscFunctionReturn(0); 2312 } 2313 2314 #undef __FUNCT__ 2315 #define __FUNCT__ "MatAXPY_MPIAIJ" 2316 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2317 { 2318 PetscErrorCode ierr; 2319 PetscInt i; 2320 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2321 PetscBLASInt bnz,one=1; 2322 Mat_SeqAIJ *x,*y; 2323 2324 PetscFunctionBegin; 2325 if (str == SAME_NONZERO_PATTERN) { 2326 PetscScalar alpha = a; 2327 x = (Mat_SeqAIJ *)xx->A->data; 2328 y = (Mat_SeqAIJ *)yy->A->data; 2329 bnz = PetscBLASIntCast(x->nz); 2330 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2331 x = (Mat_SeqAIJ *)xx->B->data; 2332 y = (Mat_SeqAIJ *)yy->B->data; 2333 bnz = PetscBLASIntCast(x->nz); 2334 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2335 } else if (str == SUBSET_NONZERO_PATTERN) { 2336 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2337 2338 x = (Mat_SeqAIJ *)xx->B->data; 2339 y = (Mat_SeqAIJ *)yy->B->data; 2340 if (y->xtoy && y->XtoY != xx->B) { 2341 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2342 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2343 } 2344 if (!y->xtoy) { /* get xtoy */ 2345 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2346 y->XtoY = xx->B; 2347 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2348 } 2349 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2350 } else { 2351 Mat B; 2352 PetscInt *nnz_d,*nnz_o; 2353 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2354 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2355 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2356 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2357 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2358 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2359 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2360 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2361 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2362 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2363 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2364 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2365 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2366 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2367 } 2368 PetscFunctionReturn(0); 2369 } 2370 2371 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2372 2373 #undef __FUNCT__ 2374 #define __FUNCT__ "MatConjugate_MPIAIJ" 2375 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2376 { 2377 #if defined(PETSC_USE_COMPLEX) 2378 PetscErrorCode ierr; 2379 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2380 2381 PetscFunctionBegin; 2382 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2383 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2384 #else 2385 PetscFunctionBegin; 2386 #endif 2387 PetscFunctionReturn(0); 2388 } 2389 2390 #undef __FUNCT__ 2391 #define __FUNCT__ "MatRealPart_MPIAIJ" 2392 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2393 { 2394 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2395 PetscErrorCode ierr; 2396 2397 PetscFunctionBegin; 2398 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2399 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2400 PetscFunctionReturn(0); 2401 } 2402 2403 #undef __FUNCT__ 2404 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2405 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2406 { 2407 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2408 PetscErrorCode ierr; 2409 2410 PetscFunctionBegin; 2411 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2412 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2413 PetscFunctionReturn(0); 2414 } 2415 2416 #ifdef PETSC_HAVE_PBGL 2417 2418 #include <boost/parallel/mpi/bsp_process_group.hpp> 2419 #include <boost/graph/distributed/ilu_default_graph.hpp> 2420 #include <boost/graph/distributed/ilu_0_block.hpp> 2421 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2422 #include <boost/graph/distributed/petsc/interface.hpp> 2423 #include <boost/multi_array.hpp> 2424 #include <boost/parallel/distributed_property_map->hpp> 2425 2426 #undef __FUNCT__ 2427 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2428 /* 2429 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2430 */ 2431 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2432 { 2433 namespace petsc = boost::distributed::petsc; 2434 2435 namespace graph_dist = boost::graph::distributed; 2436 using boost::graph::distributed::ilu_default::process_group_type; 2437 using boost::graph::ilu_permuted; 2438 2439 PetscBool row_identity, col_identity; 2440 PetscContainer c; 2441 PetscInt m, n, M, N; 2442 PetscErrorCode ierr; 2443 2444 PetscFunctionBegin; 2445 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2446 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2447 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2448 if (!row_identity || !col_identity) { 2449 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2450 } 2451 2452 process_group_type pg; 2453 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2454 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2455 lgraph_type& level_graph = *lgraph_p; 2456 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2457 2458 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2459 ilu_permuted(level_graph); 2460 2461 /* put together the new matrix */ 2462 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2463 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2464 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2465 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2466 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2467 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2468 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2469 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2470 2471 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2472 ierr = PetscContainerSetPointer(c, lgraph_p); 2473 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2474 ierr = PetscContainerDestroy(&c); 2475 PetscFunctionReturn(0); 2476 } 2477 2478 #undef __FUNCT__ 2479 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2480 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2481 { 2482 PetscFunctionBegin; 2483 PetscFunctionReturn(0); 2484 } 2485 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "MatSolve_MPIAIJ" 2488 /* 2489 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2490 */ 2491 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2492 { 2493 namespace graph_dist = boost::graph::distributed; 2494 2495 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2496 lgraph_type* lgraph_p; 2497 PetscContainer c; 2498 PetscErrorCode ierr; 2499 2500 PetscFunctionBegin; 2501 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2502 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2503 ierr = VecCopy(b, x);CHKERRQ(ierr); 2504 2505 PetscScalar* array_x; 2506 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2507 PetscInt sx; 2508 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2509 2510 PetscScalar* array_b; 2511 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2512 PetscInt sb; 2513 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2514 2515 lgraph_type& level_graph = *lgraph_p; 2516 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2517 2518 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2519 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2520 ref_x(array_x, boost::extents[num_vertices(graph)]); 2521 2522 typedef boost::iterator_property_map<array_ref_type::iterator, 2523 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2524 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2525 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2526 2527 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2528 2529 PetscFunctionReturn(0); 2530 } 2531 #endif 2532 2533 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2534 PetscInt nzlocal,nsends,nrecvs; 2535 PetscMPIInt *send_rank,*recv_rank; 2536 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2537 PetscScalar *sbuf_a,**rbuf_a; 2538 PetscErrorCode (*Destroy)(Mat); 2539 } Mat_Redundant; 2540 2541 #undef __FUNCT__ 2542 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2543 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2544 { 2545 PetscErrorCode ierr; 2546 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2547 PetscInt i; 2548 2549 PetscFunctionBegin; 2550 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2551 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2552 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2553 for (i=0; i<redund->nrecvs; i++) { 2554 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2555 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2556 } 2557 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2558 ierr = PetscFree(redund);CHKERRQ(ierr); 2559 PetscFunctionReturn(0); 2560 } 2561 2562 #undef __FUNCT__ 2563 #define __FUNCT__ "MatDestroy_MatRedundant" 2564 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2565 { 2566 PetscErrorCode ierr; 2567 PetscContainer container; 2568 Mat_Redundant *redund=PETSC_NULL; 2569 2570 PetscFunctionBegin; 2571 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2572 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2573 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2574 A->ops->destroy = redund->Destroy; 2575 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2576 if (A->ops->destroy) { 2577 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2578 } 2579 PetscFunctionReturn(0); 2580 } 2581 2582 #undef __FUNCT__ 2583 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2584 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2585 { 2586 PetscMPIInt rank,size; 2587 MPI_Comm comm=((PetscObject)mat)->comm; 2588 PetscErrorCode ierr; 2589 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2590 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2591 PetscInt *rowrange=mat->rmap->range; 2592 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2593 Mat A=aij->A,B=aij->B,C=*matredundant; 2594 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2595 PetscScalar *sbuf_a; 2596 PetscInt nzlocal=a->nz+b->nz; 2597 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2598 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2599 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2600 MatScalar *aworkA,*aworkB; 2601 PetscScalar *vals; 2602 PetscMPIInt tag1,tag2,tag3,imdex; 2603 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2604 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2605 MPI_Status recv_status,*send_status; 2606 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2607 PetscInt **rbuf_j=PETSC_NULL; 2608 PetscScalar **rbuf_a=PETSC_NULL; 2609 Mat_Redundant *redund=PETSC_NULL; 2610 PetscContainer container; 2611 2612 PetscFunctionBegin; 2613 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2614 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2615 2616 if (reuse == MAT_REUSE_MATRIX) { 2617 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2618 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2619 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2620 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2621 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2622 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2623 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2624 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2625 2626 nsends = redund->nsends; 2627 nrecvs = redund->nrecvs; 2628 send_rank = redund->send_rank; 2629 recv_rank = redund->recv_rank; 2630 sbuf_nz = redund->sbuf_nz; 2631 rbuf_nz = redund->rbuf_nz; 2632 sbuf_j = redund->sbuf_j; 2633 sbuf_a = redund->sbuf_a; 2634 rbuf_j = redund->rbuf_j; 2635 rbuf_a = redund->rbuf_a; 2636 } 2637 2638 if (reuse == MAT_INITIAL_MATRIX) { 2639 PetscMPIInt subrank,subsize; 2640 PetscInt nleftover,np_subcomm; 2641 /* get the destination processors' id send_rank, nsends and nrecvs */ 2642 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2643 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2644 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2645 np_subcomm = size/nsubcomm; 2646 nleftover = size - nsubcomm*np_subcomm; 2647 nsends = 0; nrecvs = 0; 2648 for (i=0; i<size; i++) { /* i=rank*/ 2649 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2650 send_rank[nsends] = i; nsends++; 2651 recv_rank[nrecvs++] = i; 2652 } 2653 } 2654 if (rank >= size - nleftover) {/* this proc is a leftover processor */ 2655 i = size-nleftover-1; 2656 j = 0; 2657 while (j < nsubcomm - nleftover) { 2658 send_rank[nsends++] = i; 2659 i--; j++; 2660 } 2661 } 2662 2663 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2664 for (i=0; i<nleftover; i++) { 2665 recv_rank[nrecvs++] = size-nleftover+i; 2666 } 2667 } 2668 2669 /* allocate sbuf_j, sbuf_a */ 2670 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2671 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2672 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2673 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2674 2675 /* copy mat's local entries into the buffers */ 2676 if (reuse == MAT_INITIAL_MATRIX) { 2677 rownz_max = 0; 2678 rptr = sbuf_j; 2679 cols = sbuf_j + rend-rstart + 1; 2680 vals = sbuf_a; 2681 rptr[0] = 0; 2682 for (i=0; i<rend-rstart; i++) { 2683 row = i + rstart; 2684 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2685 ncols = nzA + nzB; 2686 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2687 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2688 /* load the column indices for this row into cols */ 2689 lwrite = 0; 2690 for (l=0; l<nzB; l++) { 2691 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2692 vals[lwrite] = aworkB[l]; 2693 cols[lwrite++] = ctmp; 2694 } 2695 } 2696 for (l=0; l<nzA; l++) { 2697 vals[lwrite] = aworkA[l]; 2698 cols[lwrite++] = cstart + cworkA[l]; 2699 } 2700 for (l=0; l<nzB; l++) { 2701 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2702 vals[lwrite] = aworkB[l]; 2703 cols[lwrite++] = ctmp; 2704 } 2705 } 2706 vals += ncols; 2707 cols += ncols; 2708 rptr[i+1] = rptr[i] + ncols; 2709 if (rownz_max < ncols) rownz_max = ncols; 2710 } 2711 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2712 } else { /* only copy matrix values into sbuf_a */ 2713 rptr = sbuf_j; 2714 vals = sbuf_a; 2715 rptr[0] = 0; 2716 for (i=0; i<rend-rstart; i++) { 2717 row = i + rstart; 2718 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2719 ncols = nzA + nzB; 2720 cworkB = b->j + b->i[i]; 2721 aworkA = a->a + a->i[i]; 2722 aworkB = b->a + b->i[i]; 2723 lwrite = 0; 2724 for (l=0; l<nzB; l++) { 2725 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2726 } 2727 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2728 for (l=0; l<nzB; l++) { 2729 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2730 } 2731 vals += ncols; 2732 rptr[i+1] = rptr[i] + ncols; 2733 } 2734 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2735 2736 /* send nzlocal to others, and recv other's nzlocal */ 2737 /*--------------------------------------------------*/ 2738 if (reuse == MAT_INITIAL_MATRIX) { 2739 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2740 s_waits2 = s_waits3 + nsends; 2741 s_waits1 = s_waits2 + nsends; 2742 r_waits1 = s_waits1 + nsends; 2743 r_waits2 = r_waits1 + nrecvs; 2744 r_waits3 = r_waits2 + nrecvs; 2745 } else { 2746 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2747 r_waits3 = s_waits3 + nsends; 2748 } 2749 2750 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2751 if (reuse == MAT_INITIAL_MATRIX) { 2752 /* get new tags to keep the communication clean */ 2753 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2754 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2755 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2756 2757 /* post receives of other's nzlocal */ 2758 for (i=0; i<nrecvs; i++) { 2759 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2760 } 2761 /* send nzlocal to others */ 2762 for (i=0; i<nsends; i++) { 2763 sbuf_nz[i] = nzlocal; 2764 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2765 } 2766 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2767 count = nrecvs; 2768 while (count) { 2769 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2770 recv_rank[imdex] = recv_status.MPI_SOURCE; 2771 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2772 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2773 2774 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2775 rbuf_nz[imdex] += i + 2; 2776 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2777 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2778 count--; 2779 } 2780 /* wait on sends of nzlocal */ 2781 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2782 /* send mat->i,j to others, and recv from other's */ 2783 /*------------------------------------------------*/ 2784 for (i=0; i<nsends; i++) { 2785 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2786 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2787 } 2788 /* wait on receives of mat->i,j */ 2789 /*------------------------------*/ 2790 count = nrecvs; 2791 while (count) { 2792 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2793 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2794 count--; 2795 } 2796 /* wait on sends of mat->i,j */ 2797 /*---------------------------*/ 2798 if (nsends) { 2799 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2800 } 2801 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2802 2803 /* post receives, send and receive mat->a */ 2804 /*----------------------------------------*/ 2805 for (imdex=0; imdex<nrecvs; imdex++) { 2806 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2807 } 2808 for (i=0; i<nsends; i++) { 2809 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2810 } 2811 count = nrecvs; 2812 while (count) { 2813 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2814 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2815 count--; 2816 } 2817 if (nsends) { 2818 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2819 } 2820 2821 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2822 2823 /* create redundant matrix */ 2824 /*-------------------------*/ 2825 if (reuse == MAT_INITIAL_MATRIX) { 2826 /* compute rownz_max for preallocation */ 2827 for (imdex=0; imdex<nrecvs; imdex++) { 2828 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2829 rptr = rbuf_j[imdex]; 2830 for (i=0; i<j; i++) { 2831 ncols = rptr[i+1] - rptr[i]; 2832 if (rownz_max < ncols) rownz_max = ncols; 2833 } 2834 } 2835 2836 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2837 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2838 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2839 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2840 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2841 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2842 } else { 2843 C = *matredundant; 2844 } 2845 2846 /* insert local matrix entries */ 2847 rptr = sbuf_j; 2848 cols = sbuf_j + rend-rstart + 1; 2849 vals = sbuf_a; 2850 for (i=0; i<rend-rstart; i++) { 2851 row = i + rstart; 2852 ncols = rptr[i+1] - rptr[i]; 2853 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2854 vals += ncols; 2855 cols += ncols; 2856 } 2857 /* insert received matrix entries */ 2858 for (imdex=0; imdex<nrecvs; imdex++) { 2859 rstart = rowrange[recv_rank[imdex]]; 2860 rend = rowrange[recv_rank[imdex]+1]; 2861 rptr = rbuf_j[imdex]; 2862 cols = rbuf_j[imdex] + rend-rstart + 1; 2863 vals = rbuf_a[imdex]; 2864 for (i=0; i<rend-rstart; i++) { 2865 row = i + rstart; 2866 ncols = rptr[i+1] - rptr[i]; 2867 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2868 vals += ncols; 2869 cols += ncols; 2870 } 2871 } 2872 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2873 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2874 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2875 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2876 if (reuse == MAT_INITIAL_MATRIX) { 2877 PetscContainer container; 2878 *matredundant = C; 2879 /* create a supporting struct and attach it to C for reuse */ 2880 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2881 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2882 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2883 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2884 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2885 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2886 2887 redund->nzlocal = nzlocal; 2888 redund->nsends = nsends; 2889 redund->nrecvs = nrecvs; 2890 redund->send_rank = send_rank; 2891 redund->recv_rank = recv_rank; 2892 redund->sbuf_nz = sbuf_nz; 2893 redund->rbuf_nz = rbuf_nz; 2894 redund->sbuf_j = sbuf_j; 2895 redund->sbuf_a = sbuf_a; 2896 redund->rbuf_j = rbuf_j; 2897 redund->rbuf_a = rbuf_a; 2898 2899 redund->Destroy = C->ops->destroy; 2900 C->ops->destroy = MatDestroy_MatRedundant; 2901 } 2902 PetscFunctionReturn(0); 2903 } 2904 2905 #undef __FUNCT__ 2906 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2907 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2908 { 2909 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2910 PetscErrorCode ierr; 2911 PetscInt i,*idxb = 0; 2912 PetscScalar *va,*vb; 2913 Vec vtmp; 2914 2915 PetscFunctionBegin; 2916 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2917 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2918 if (idx) { 2919 for (i=0; i<A->rmap->n; i++) { 2920 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2921 } 2922 } 2923 2924 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2925 if (idx) { 2926 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2927 } 2928 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2929 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2930 2931 for (i=0; i<A->rmap->n; i++) { 2932 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2933 va[i] = vb[i]; 2934 if (idx) idx[i] = a->garray[idxb[i]]; 2935 } 2936 } 2937 2938 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2939 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2940 ierr = PetscFree(idxb);CHKERRQ(ierr); 2941 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2942 PetscFunctionReturn(0); 2943 } 2944 2945 #undef __FUNCT__ 2946 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2947 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2948 { 2949 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2950 PetscErrorCode ierr; 2951 PetscInt i,*idxb = 0; 2952 PetscScalar *va,*vb; 2953 Vec vtmp; 2954 2955 PetscFunctionBegin; 2956 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2957 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2958 if (idx) { 2959 for (i=0; i<A->cmap->n; i++) { 2960 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2961 } 2962 } 2963 2964 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2965 if (idx) { 2966 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2967 } 2968 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2969 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2970 2971 for (i=0; i<A->rmap->n; i++) { 2972 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2973 va[i] = vb[i]; 2974 if (idx) idx[i] = a->garray[idxb[i]]; 2975 } 2976 } 2977 2978 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2979 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2980 ierr = PetscFree(idxb);CHKERRQ(ierr); 2981 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2982 PetscFunctionReturn(0); 2983 } 2984 2985 #undef __FUNCT__ 2986 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2987 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2988 { 2989 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2990 PetscInt n = A->rmap->n; 2991 PetscInt cstart = A->cmap->rstart; 2992 PetscInt *cmap = mat->garray; 2993 PetscInt *diagIdx, *offdiagIdx; 2994 Vec diagV, offdiagV; 2995 PetscScalar *a, *diagA, *offdiagA; 2996 PetscInt r; 2997 PetscErrorCode ierr; 2998 2999 PetscFunctionBegin; 3000 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3001 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 3002 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 3003 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3004 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3005 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3006 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3007 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3008 for (r = 0; r < n; ++r) { 3009 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3010 a[r] = diagA[r]; 3011 idx[r] = cstart + diagIdx[r]; 3012 } else { 3013 a[r] = offdiagA[r]; 3014 idx[r] = cmap[offdiagIdx[r]]; 3015 } 3016 } 3017 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3018 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3019 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3020 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3021 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3022 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3023 PetscFunctionReturn(0); 3024 } 3025 3026 #undef __FUNCT__ 3027 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3028 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3029 { 3030 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 3031 PetscInt n = A->rmap->n; 3032 PetscInt cstart = A->cmap->rstart; 3033 PetscInt *cmap = mat->garray; 3034 PetscInt *diagIdx, *offdiagIdx; 3035 Vec diagV, offdiagV; 3036 PetscScalar *a, *diagA, *offdiagA; 3037 PetscInt r; 3038 PetscErrorCode ierr; 3039 3040 PetscFunctionBegin; 3041 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3042 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3043 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3044 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3045 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3046 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3047 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3048 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3049 for (r = 0; r < n; ++r) { 3050 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3051 a[r] = diagA[r]; 3052 idx[r] = cstart + diagIdx[r]; 3053 } else { 3054 a[r] = offdiagA[r]; 3055 idx[r] = cmap[offdiagIdx[r]]; 3056 } 3057 } 3058 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3059 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3060 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3061 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3062 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3063 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3064 PetscFunctionReturn(0); 3065 } 3066 3067 #undef __FUNCT__ 3068 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3069 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3070 { 3071 PetscErrorCode ierr; 3072 Mat *dummy; 3073 3074 PetscFunctionBegin; 3075 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3076 *newmat = *dummy; 3077 ierr = PetscFree(dummy);CHKERRQ(ierr); 3078 PetscFunctionReturn(0); 3079 } 3080 3081 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3082 3083 #undef __FUNCT__ 3084 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3085 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3086 { 3087 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3088 PetscErrorCode ierr; 3089 3090 PetscFunctionBegin; 3091 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3092 PetscFunctionReturn(0); 3093 } 3094 3095 #undef __FUNCT__ 3096 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3097 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3098 { 3099 PetscErrorCode ierr; 3100 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3101 3102 PetscFunctionBegin; 3103 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3104 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3105 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3106 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3107 PetscFunctionReturn(0); 3108 } 3109 3110 /* -------------------------------------------------------------------*/ 3111 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3112 MatGetRow_MPIAIJ, 3113 MatRestoreRow_MPIAIJ, 3114 MatMult_MPIAIJ, 3115 /* 4*/ MatMultAdd_MPIAIJ, 3116 MatMultTranspose_MPIAIJ, 3117 MatMultTransposeAdd_MPIAIJ, 3118 #ifdef PETSC_HAVE_PBGL 3119 MatSolve_MPIAIJ, 3120 #else 3121 0, 3122 #endif 3123 0, 3124 0, 3125 /*10*/ 0, 3126 0, 3127 0, 3128 MatSOR_MPIAIJ, 3129 MatTranspose_MPIAIJ, 3130 /*15*/ MatGetInfo_MPIAIJ, 3131 MatEqual_MPIAIJ, 3132 MatGetDiagonal_MPIAIJ, 3133 MatDiagonalScale_MPIAIJ, 3134 MatNorm_MPIAIJ, 3135 /*20*/ MatAssemblyBegin_MPIAIJ, 3136 MatAssemblyEnd_MPIAIJ, 3137 MatSetOption_MPIAIJ, 3138 MatZeroEntries_MPIAIJ, 3139 /*24*/ MatZeroRows_MPIAIJ, 3140 0, 3141 #ifdef PETSC_HAVE_PBGL 3142 0, 3143 #else 3144 0, 3145 #endif 3146 0, 3147 0, 3148 /*29*/ MatSetUp_MPIAIJ, 3149 #ifdef PETSC_HAVE_PBGL 3150 0, 3151 #else 3152 0, 3153 #endif 3154 0, 3155 0, 3156 0, 3157 /*34*/ MatDuplicate_MPIAIJ, 3158 0, 3159 0, 3160 0, 3161 0, 3162 /*39*/ MatAXPY_MPIAIJ, 3163 MatGetSubMatrices_MPIAIJ, 3164 MatIncreaseOverlap_MPIAIJ, 3165 MatGetValues_MPIAIJ, 3166 MatCopy_MPIAIJ, 3167 /*44*/ MatGetRowMax_MPIAIJ, 3168 MatScale_MPIAIJ, 3169 0, 3170 0, 3171 MatZeroRowsColumns_MPIAIJ, 3172 /*49*/ MatSetRandom_MPIAIJ, 3173 0, 3174 0, 3175 0, 3176 0, 3177 /*54*/ MatFDColoringCreate_MPIAIJ, 3178 0, 3179 MatSetUnfactored_MPIAIJ, 3180 MatPermute_MPIAIJ, 3181 0, 3182 /*59*/ MatGetSubMatrix_MPIAIJ, 3183 MatDestroy_MPIAIJ, 3184 MatView_MPIAIJ, 3185 0, 3186 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3187 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3188 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3189 0, 3190 0, 3191 0, 3192 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3193 MatGetRowMinAbs_MPIAIJ, 3194 0, 3195 MatSetColoring_MPIAIJ, 3196 0, 3197 MatSetValuesAdifor_MPIAIJ, 3198 /*75*/ MatFDColoringApply_AIJ, 3199 0, 3200 0, 3201 0, 3202 MatFindZeroDiagonals_MPIAIJ, 3203 /*80*/ 0, 3204 0, 3205 0, 3206 /*83*/ MatLoad_MPIAIJ, 3207 0, 3208 0, 3209 0, 3210 0, 3211 0, 3212 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3213 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3214 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3215 MatPtAP_MPIAIJ_MPIAIJ, 3216 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3217 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3218 0, 3219 0, 3220 0, 3221 0, 3222 /*99*/ 0, 3223 0, 3224 0, 3225 MatConjugate_MPIAIJ, 3226 0, 3227 /*104*/MatSetValuesRow_MPIAIJ, 3228 MatRealPart_MPIAIJ, 3229 MatImaginaryPart_MPIAIJ, 3230 0, 3231 0, 3232 /*109*/0, 3233 MatGetRedundantMatrix_MPIAIJ, 3234 MatGetRowMin_MPIAIJ, 3235 0, 3236 0, 3237 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3238 0, 3239 0, 3240 0, 3241 0, 3242 /*119*/0, 3243 0, 3244 0, 3245 0, 3246 MatGetMultiProcBlock_MPIAIJ, 3247 /*124*/MatFindNonzeroRows_MPIAIJ, 3248 MatGetColumnNorms_MPIAIJ, 3249 MatInvertBlockDiagonal_MPIAIJ, 3250 0, 3251 MatGetSubMatricesParallel_MPIAIJ, 3252 /*129*/0, 3253 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3254 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3255 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3256 0, 3257 /*134*/0, 3258 0, 3259 0, 3260 0, 3261 0 3262 }; 3263 3264 /* ----------------------------------------------------------------------------------------*/ 3265 3266 EXTERN_C_BEGIN 3267 #undef __FUNCT__ 3268 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3269 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3270 { 3271 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3272 PetscErrorCode ierr; 3273 3274 PetscFunctionBegin; 3275 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3276 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3277 PetscFunctionReturn(0); 3278 } 3279 EXTERN_C_END 3280 3281 EXTERN_C_BEGIN 3282 #undef __FUNCT__ 3283 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3284 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3285 { 3286 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3287 PetscErrorCode ierr; 3288 3289 PetscFunctionBegin; 3290 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3291 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3292 PetscFunctionReturn(0); 3293 } 3294 EXTERN_C_END 3295 3296 EXTERN_C_BEGIN 3297 #undef __FUNCT__ 3298 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3299 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3300 { 3301 Mat_MPIAIJ *b; 3302 PetscErrorCode ierr; 3303 PetscInt i; 3304 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3305 3306 PetscFunctionBegin; 3307 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3308 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3309 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3310 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3311 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3312 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3313 3314 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3315 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3316 if (d_nnz) { 3317 for (i=0; i<B->rmap->n; i++) { 3318 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3319 } 3320 } 3321 if (o_nnz) { 3322 for (i=0; i<B->rmap->n; i++) { 3323 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3324 } 3325 } 3326 b = (Mat_MPIAIJ*)B->data; 3327 3328 if (!B->preallocated) { 3329 /* Explicitly create 2 MATSEQAIJ matrices. */ 3330 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3331 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3332 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3333 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3334 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3335 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3336 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3337 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3338 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3339 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3340 } 3341 3342 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3343 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3344 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3345 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3346 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3347 B->preallocated = PETSC_TRUE; 3348 PetscFunctionReturn(0); 3349 } 3350 EXTERN_C_END 3351 3352 #undef __FUNCT__ 3353 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3354 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3355 { 3356 Mat mat; 3357 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3358 PetscErrorCode ierr; 3359 3360 PetscFunctionBegin; 3361 *newmat = 0; 3362 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3363 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3364 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3365 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3366 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3367 a = (Mat_MPIAIJ*)mat->data; 3368 3369 mat->factortype = matin->factortype; 3370 mat->rmap->bs = matin->rmap->bs; 3371 mat->cmap->bs = matin->cmap->bs; 3372 mat->assembled = PETSC_TRUE; 3373 mat->insertmode = NOT_SET_VALUES; 3374 mat->preallocated = PETSC_TRUE; 3375 3376 a->size = oldmat->size; 3377 a->rank = oldmat->rank; 3378 a->donotstash = oldmat->donotstash; 3379 a->roworiented = oldmat->roworiented; 3380 a->rowindices = 0; 3381 a->rowvalues = 0; 3382 a->getrowactive = PETSC_FALSE; 3383 3384 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3385 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3386 3387 if (oldmat->colmap) { 3388 #if defined (PETSC_USE_CTABLE) 3389 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3390 #else 3391 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3392 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3393 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3394 #endif 3395 } else a->colmap = 0; 3396 if (oldmat->garray) { 3397 PetscInt len; 3398 len = oldmat->B->cmap->n; 3399 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3400 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3401 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3402 } else a->garray = 0; 3403 3404 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3405 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3406 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3407 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3408 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3409 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3410 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3411 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3412 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3413 *newmat = mat; 3414 PetscFunctionReturn(0); 3415 } 3416 3417 3418 3419 #undef __FUNCT__ 3420 #define __FUNCT__ "MatLoad_MPIAIJ" 3421 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3422 { 3423 PetscScalar *vals,*svals; 3424 MPI_Comm comm = ((PetscObject)viewer)->comm; 3425 PetscErrorCode ierr; 3426 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3427 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3428 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3429 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3430 PetscInt cend,cstart,n,*rowners,sizesset=1; 3431 int fd; 3432 PetscInt bs = 1; 3433 3434 PetscFunctionBegin; 3435 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3436 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3437 if (!rank) { 3438 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3439 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3440 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3441 } 3442 3443 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3444 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3445 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3446 3447 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3448 3449 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3450 M = header[1]; N = header[2]; 3451 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3452 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3453 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3454 3455 /* If global sizes are set, check if they are consistent with that given in the file */ 3456 if (sizesset) { 3457 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3458 } 3459 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3460 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3461 3462 /* determine ownership of all (block) rows */ 3463 if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3464 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3465 else m = newMat->rmap->n; /* Set by user */ 3466 3467 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3468 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3469 3470 /* First process needs enough room for process with most rows */ 3471 if (!rank) { 3472 mmax = rowners[1]; 3473 for (i=2; i<=size; i++) { 3474 mmax = PetscMax(mmax, rowners[i]); 3475 } 3476 } else mmax = m; 3477 3478 rowners[0] = 0; 3479 for (i=2; i<=size; i++) { 3480 rowners[i] += rowners[i-1]; 3481 } 3482 rstart = rowners[rank]; 3483 rend = rowners[rank+1]; 3484 3485 /* distribute row lengths to all processors */ 3486 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3487 if (!rank) { 3488 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3489 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3490 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3491 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3492 for (j=0; j<m; j++) { 3493 procsnz[0] += ourlens[j]; 3494 } 3495 for (i=1; i<size; i++) { 3496 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3497 /* calculate the number of nonzeros on each processor */ 3498 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3499 procsnz[i] += rowlengths[j]; 3500 } 3501 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3502 } 3503 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3504 } else { 3505 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3506 } 3507 3508 if (!rank) { 3509 /* determine max buffer needed and allocate it */ 3510 maxnz = 0; 3511 for (i=0; i<size; i++) { 3512 maxnz = PetscMax(maxnz,procsnz[i]); 3513 } 3514 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3515 3516 /* read in my part of the matrix column indices */ 3517 nz = procsnz[0]; 3518 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3519 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3520 3521 /* read in every one elses and ship off */ 3522 for (i=1; i<size; i++) { 3523 nz = procsnz[i]; 3524 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3525 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3526 } 3527 ierr = PetscFree(cols);CHKERRQ(ierr); 3528 } else { 3529 /* determine buffer space needed for message */ 3530 nz = 0; 3531 for (i=0; i<m; i++) { 3532 nz += ourlens[i]; 3533 } 3534 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3535 3536 /* receive message of column indices*/ 3537 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3538 } 3539 3540 /* determine column ownership if matrix is not square */ 3541 if (N != M) { 3542 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3543 else n = newMat->cmap->n; 3544 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3545 cstart = cend - n; 3546 } else { 3547 cstart = rstart; 3548 cend = rend; 3549 n = cend - cstart; 3550 } 3551 3552 /* loop over local rows, determining number of off diagonal entries */ 3553 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3554 jj = 0; 3555 for (i=0; i<m; i++) { 3556 for (j=0; j<ourlens[i]; j++) { 3557 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3558 jj++; 3559 } 3560 } 3561 3562 for (i=0; i<m; i++) { 3563 ourlens[i] -= offlens[i]; 3564 } 3565 if (!sizesset) { 3566 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3567 } 3568 3569 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3570 3571 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3572 3573 for (i=0; i<m; i++) { 3574 ourlens[i] += offlens[i]; 3575 } 3576 3577 if (!rank) { 3578 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3579 3580 /* read in my part of the matrix numerical values */ 3581 nz = procsnz[0]; 3582 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3583 3584 /* insert into matrix */ 3585 jj = rstart; 3586 smycols = mycols; 3587 svals = vals; 3588 for (i=0; i<m; i++) { 3589 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3590 smycols += ourlens[i]; 3591 svals += ourlens[i]; 3592 jj++; 3593 } 3594 3595 /* read in other processors and ship out */ 3596 for (i=1; i<size; i++) { 3597 nz = procsnz[i]; 3598 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3599 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3600 } 3601 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3602 } else { 3603 /* receive numeric values */ 3604 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3605 3606 /* receive message of values*/ 3607 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3608 3609 /* insert into matrix */ 3610 jj = rstart; 3611 smycols = mycols; 3612 svals = vals; 3613 for (i=0; i<m; i++) { 3614 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3615 smycols += ourlens[i]; 3616 svals += ourlens[i]; 3617 jj++; 3618 } 3619 } 3620 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3621 ierr = PetscFree(vals);CHKERRQ(ierr); 3622 ierr = PetscFree(mycols);CHKERRQ(ierr); 3623 ierr = PetscFree(rowners);CHKERRQ(ierr); 3624 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3625 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3626 3627 PetscFunctionReturn(0); 3628 } 3629 3630 #undef __FUNCT__ 3631 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3632 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3633 { 3634 PetscErrorCode ierr; 3635 IS iscol_local; 3636 PetscInt csize; 3637 3638 PetscFunctionBegin; 3639 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3640 if (call == MAT_REUSE_MATRIX) { 3641 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3642 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3643 } else { 3644 PetscInt cbs; 3645 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3646 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3647 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3648 } 3649 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3650 if (call == MAT_INITIAL_MATRIX) { 3651 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3652 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3653 } 3654 PetscFunctionReturn(0); 3655 } 3656 3657 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3658 #undef __FUNCT__ 3659 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3660 /* 3661 Not great since it makes two copies of the submatrix, first an SeqAIJ 3662 in local and then by concatenating the local matrices the end result. 3663 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3664 3665 Note: This requires a sequential iscol with all indices. 3666 */ 3667 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3668 { 3669 PetscErrorCode ierr; 3670 PetscMPIInt rank,size; 3671 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3672 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3673 PetscBool allcolumns, colflag; 3674 Mat M,Mreuse; 3675 MatScalar *vwork,*aa; 3676 MPI_Comm comm = ((PetscObject)mat)->comm; 3677 Mat_SeqAIJ *aij; 3678 3679 3680 PetscFunctionBegin; 3681 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3682 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3683 3684 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3685 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3686 if (colflag && ncol == mat->cmap->N) { 3687 allcolumns = PETSC_TRUE; 3688 } else { 3689 allcolumns = PETSC_FALSE; 3690 } 3691 if (call == MAT_REUSE_MATRIX) { 3692 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3693 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3694 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3695 } else { 3696 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3697 } 3698 3699 /* 3700 m - number of local rows 3701 n - number of columns (same on all processors) 3702 rstart - first row in new global matrix generated 3703 */ 3704 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3705 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3706 if (call == MAT_INITIAL_MATRIX) { 3707 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3708 ii = aij->i; 3709 jj = aij->j; 3710 3711 /* 3712 Determine the number of non-zeros in the diagonal and off-diagonal 3713 portions of the matrix in order to do correct preallocation 3714 */ 3715 3716 /* first get start and end of "diagonal" columns */ 3717 if (csize == PETSC_DECIDE) { 3718 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3719 if (mglobal == n) { /* square matrix */ 3720 nlocal = m; 3721 } else { 3722 nlocal = n/size + ((n % size) > rank); 3723 } 3724 } else { 3725 nlocal = csize; 3726 } 3727 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3728 rstart = rend - nlocal; 3729 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3730 3731 /* next, compute all the lengths */ 3732 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3733 olens = dlens + m; 3734 for (i=0; i<m; i++) { 3735 jend = ii[i+1] - ii[i]; 3736 olen = 0; 3737 dlen = 0; 3738 for (j=0; j<jend; j++) { 3739 if (*jj < rstart || *jj >= rend) olen++; 3740 else dlen++; 3741 jj++; 3742 } 3743 olens[i] = olen; 3744 dlens[i] = dlen; 3745 } 3746 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3747 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3748 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3749 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3750 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3751 ierr = PetscFree(dlens);CHKERRQ(ierr); 3752 } else { 3753 PetscInt ml,nl; 3754 3755 M = *newmat; 3756 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3757 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3758 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3759 /* 3760 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3761 rather than the slower MatSetValues(). 3762 */ 3763 M->was_assembled = PETSC_TRUE; 3764 M->assembled = PETSC_FALSE; 3765 } 3766 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3767 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3768 ii = aij->i; 3769 jj = aij->j; 3770 aa = aij->a; 3771 for (i=0; i<m; i++) { 3772 row = rstart + i; 3773 nz = ii[i+1] - ii[i]; 3774 cwork = jj; jj += nz; 3775 vwork = aa; aa += nz; 3776 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3777 } 3778 3779 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3780 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3781 *newmat = M; 3782 3783 /* save submatrix used in processor for next request */ 3784 if (call == MAT_INITIAL_MATRIX) { 3785 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3786 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3787 } 3788 3789 PetscFunctionReturn(0); 3790 } 3791 3792 EXTERN_C_BEGIN 3793 #undef __FUNCT__ 3794 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3795 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3796 { 3797 PetscInt m,cstart, cend,j,nnz,i,d; 3798 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3799 const PetscInt *JJ; 3800 PetscScalar *values; 3801 PetscErrorCode ierr; 3802 3803 PetscFunctionBegin; 3804 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3805 3806 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3807 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3808 m = B->rmap->n; 3809 cstart = B->cmap->rstart; 3810 cend = B->cmap->rend; 3811 rstart = B->rmap->rstart; 3812 3813 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3814 3815 #if defined(PETSC_USE_DEBUGGING) 3816 for (i=0; i<m; i++) { 3817 nnz = Ii[i+1]- Ii[i]; 3818 JJ = J + Ii[i]; 3819 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3820 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3821 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3822 } 3823 #endif 3824 3825 for (i=0; i<m; i++) { 3826 nnz = Ii[i+1]- Ii[i]; 3827 JJ = J + Ii[i]; 3828 nnz_max = PetscMax(nnz_max,nnz); 3829 d = 0; 3830 for (j=0; j<nnz; j++) { 3831 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3832 } 3833 d_nnz[i] = d; 3834 o_nnz[i] = nnz - d; 3835 } 3836 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3837 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3838 3839 if (v) values = (PetscScalar*)v; 3840 else { 3841 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3842 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3843 } 3844 3845 for (i=0; i<m; i++) { 3846 ii = i + rstart; 3847 nnz = Ii[i+1]- Ii[i]; 3848 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3849 } 3850 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3851 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3852 3853 if (!v) { 3854 ierr = PetscFree(values);CHKERRQ(ierr); 3855 } 3856 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3857 PetscFunctionReturn(0); 3858 } 3859 EXTERN_C_END 3860 3861 #undef __FUNCT__ 3862 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3863 /*@ 3864 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3865 (the default parallel PETSc format). 3866 3867 Collective on MPI_Comm 3868 3869 Input Parameters: 3870 + B - the matrix 3871 . i - the indices into j for the start of each local row (starts with zero) 3872 . j - the column indices for each local row (starts with zero) 3873 - v - optional values in the matrix 3874 3875 Level: developer 3876 3877 Notes: 3878 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3879 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3880 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3881 3882 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3883 3884 The format which is used for the sparse matrix input, is equivalent to a 3885 row-major ordering.. i.e for the following matrix, the input data expected is 3886 as shown: 3887 3888 1 0 0 3889 2 0 3 P0 3890 ------- 3891 4 5 6 P1 3892 3893 Process0 [P0]: rows_owned=[0,1] 3894 i = {0,1,3} [size = nrow+1 = 2+1] 3895 j = {0,0,2} [size = nz = 6] 3896 v = {1,2,3} [size = nz = 6] 3897 3898 Process1 [P1]: rows_owned=[2] 3899 i = {0,3} [size = nrow+1 = 1+1] 3900 j = {0,1,2} [size = nz = 6] 3901 v = {4,5,6} [size = nz = 6] 3902 3903 .keywords: matrix, aij, compressed row, sparse, parallel 3904 3905 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3906 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3907 @*/ 3908 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3909 { 3910 PetscErrorCode ierr; 3911 3912 PetscFunctionBegin; 3913 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3914 PetscFunctionReturn(0); 3915 } 3916 3917 #undef __FUNCT__ 3918 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3919 /*@C 3920 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3921 (the default parallel PETSc format). For good matrix assembly performance 3922 the user should preallocate the matrix storage by setting the parameters 3923 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3924 performance can be increased by more than a factor of 50. 3925 3926 Collective on MPI_Comm 3927 3928 Input Parameters: 3929 + A - the matrix 3930 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3931 (same value is used for all local rows) 3932 . d_nnz - array containing the number of nonzeros in the various rows of the 3933 DIAGONAL portion of the local submatrix (possibly different for each row) 3934 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3935 The size of this array is equal to the number of local rows, i.e 'm'. 3936 For matrices that will be factored, you must leave room for (and set) 3937 the diagonal entry even if it is zero. 3938 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3939 submatrix (same value is used for all local rows). 3940 - o_nnz - array containing the number of nonzeros in the various rows of the 3941 OFF-DIAGONAL portion of the local submatrix (possibly different for 3942 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3943 structure. The size of this array is equal to the number 3944 of local rows, i.e 'm'. 3945 3946 If the *_nnz parameter is given then the *_nz parameter is ignored 3947 3948 The AIJ format (also called the Yale sparse matrix format or 3949 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3950 storage. The stored row and column indices begin with zero. 3951 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3952 3953 The parallel matrix is partitioned such that the first m0 rows belong to 3954 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3955 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3956 3957 The DIAGONAL portion of the local submatrix of a processor can be defined 3958 as the submatrix which is obtained by extraction the part corresponding to 3959 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3960 first row that belongs to the processor, r2 is the last row belonging to 3961 the this processor, and c1-c2 is range of indices of the local part of a 3962 vector suitable for applying the matrix to. This is an mxn matrix. In the 3963 common case of a square matrix, the row and column ranges are the same and 3964 the DIAGONAL part is also square. The remaining portion of the local 3965 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3966 3967 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3968 3969 You can call MatGetInfo() to get information on how effective the preallocation was; 3970 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3971 You can also run with the option -info and look for messages with the string 3972 malloc in them to see if additional memory allocation was needed. 3973 3974 Example usage: 3975 3976 Consider the following 8x8 matrix with 34 non-zero values, that is 3977 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3978 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3979 as follows: 3980 3981 .vb 3982 1 2 0 | 0 3 0 | 0 4 3983 Proc0 0 5 6 | 7 0 0 | 8 0 3984 9 0 10 | 11 0 0 | 12 0 3985 ------------------------------------- 3986 13 0 14 | 15 16 17 | 0 0 3987 Proc1 0 18 0 | 19 20 21 | 0 0 3988 0 0 0 | 22 23 0 | 24 0 3989 ------------------------------------- 3990 Proc2 25 26 27 | 0 0 28 | 29 0 3991 30 0 0 | 31 32 33 | 0 34 3992 .ve 3993 3994 This can be represented as a collection of submatrices as: 3995 3996 .vb 3997 A B C 3998 D E F 3999 G H I 4000 .ve 4001 4002 Where the submatrices A,B,C are owned by proc0, D,E,F are 4003 owned by proc1, G,H,I are owned by proc2. 4004 4005 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4006 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4007 The 'M','N' parameters are 8,8, and have the same values on all procs. 4008 4009 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4010 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4011 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4012 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4013 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4014 matrix, ans [DF] as another SeqAIJ matrix. 4015 4016 When d_nz, o_nz parameters are specified, d_nz storage elements are 4017 allocated for every row of the local diagonal submatrix, and o_nz 4018 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4019 One way to choose d_nz and o_nz is to use the max nonzerors per local 4020 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4021 In this case, the values of d_nz,o_nz are: 4022 .vb 4023 proc0 : dnz = 2, o_nz = 2 4024 proc1 : dnz = 3, o_nz = 2 4025 proc2 : dnz = 1, o_nz = 4 4026 .ve 4027 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4028 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4029 for proc3. i.e we are using 12+15+10=37 storage locations to store 4030 34 values. 4031 4032 When d_nnz, o_nnz parameters are specified, the storage is specified 4033 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4034 In the above case the values for d_nnz,o_nnz are: 4035 .vb 4036 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4037 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4038 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4039 .ve 4040 Here the space allocated is sum of all the above values i.e 34, and 4041 hence pre-allocation is perfect. 4042 4043 Level: intermediate 4044 4045 .keywords: matrix, aij, compressed row, sparse, parallel 4046 4047 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4048 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4049 @*/ 4050 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4051 { 4052 PetscErrorCode ierr; 4053 4054 PetscFunctionBegin; 4055 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4056 PetscValidType(B,1); 4057 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4058 PetscFunctionReturn(0); 4059 } 4060 4061 #undef __FUNCT__ 4062 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4063 /*@ 4064 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4065 CSR format the local rows. 4066 4067 Collective on MPI_Comm 4068 4069 Input Parameters: 4070 + comm - MPI communicator 4071 . m - number of local rows (Cannot be PETSC_DECIDE) 4072 . n - This value should be the same as the local size used in creating the 4073 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4074 calculated if N is given) For square matrices n is almost always m. 4075 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4076 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4077 . i - row indices 4078 . j - column indices 4079 - a - matrix values 4080 4081 Output Parameter: 4082 . mat - the matrix 4083 4084 Level: intermediate 4085 4086 Notes: 4087 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4088 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4089 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4090 4091 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4092 4093 The format which is used for the sparse matrix input, is equivalent to a 4094 row-major ordering.. i.e for the following matrix, the input data expected is 4095 as shown: 4096 4097 1 0 0 4098 2 0 3 P0 4099 ------- 4100 4 5 6 P1 4101 4102 Process0 [P0]: rows_owned=[0,1] 4103 i = {0,1,3} [size = nrow+1 = 2+1] 4104 j = {0,0,2} [size = nz = 6] 4105 v = {1,2,3} [size = nz = 6] 4106 4107 Process1 [P1]: rows_owned=[2] 4108 i = {0,3} [size = nrow+1 = 1+1] 4109 j = {0,1,2} [size = nz = 6] 4110 v = {4,5,6} [size = nz = 6] 4111 4112 .keywords: matrix, aij, compressed row, sparse, parallel 4113 4114 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4115 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4116 @*/ 4117 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4118 { 4119 PetscErrorCode ierr; 4120 4121 PetscFunctionBegin; 4122 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4123 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4124 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4125 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4126 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4127 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4128 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4129 PetscFunctionReturn(0); 4130 } 4131 4132 #undef __FUNCT__ 4133 #define __FUNCT__ "MatCreateAIJ" 4134 /*@C 4135 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4136 (the default parallel PETSc format). For good matrix assembly performance 4137 the user should preallocate the matrix storage by setting the parameters 4138 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4139 performance can be increased by more than a factor of 50. 4140 4141 Collective on MPI_Comm 4142 4143 Input Parameters: 4144 + comm - MPI communicator 4145 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4146 This value should be the same as the local size used in creating the 4147 y vector for the matrix-vector product y = Ax. 4148 . n - This value should be the same as the local size used in creating the 4149 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4150 calculated if N is given) For square matrices n is almost always m. 4151 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4152 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4153 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4154 (same value is used for all local rows) 4155 . d_nnz - array containing the number of nonzeros in the various rows of the 4156 DIAGONAL portion of the local submatrix (possibly different for each row) 4157 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4158 The size of this array is equal to the number of local rows, i.e 'm'. 4159 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4160 submatrix (same value is used for all local rows). 4161 - o_nnz - array containing the number of nonzeros in the various rows of the 4162 OFF-DIAGONAL portion of the local submatrix (possibly different for 4163 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4164 structure. The size of this array is equal to the number 4165 of local rows, i.e 'm'. 4166 4167 Output Parameter: 4168 . A - the matrix 4169 4170 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4171 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4172 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4173 4174 Notes: 4175 If the *_nnz parameter is given then the *_nz parameter is ignored 4176 4177 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4178 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4179 storage requirements for this matrix. 4180 4181 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4182 processor than it must be used on all processors that share the object for 4183 that argument. 4184 4185 The user MUST specify either the local or global matrix dimensions 4186 (possibly both). 4187 4188 The parallel matrix is partitioned across processors such that the 4189 first m0 rows belong to process 0, the next m1 rows belong to 4190 process 1, the next m2 rows belong to process 2 etc.. where 4191 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4192 values corresponding to [m x N] submatrix. 4193 4194 The columns are logically partitioned with the n0 columns belonging 4195 to 0th partition, the next n1 columns belonging to the next 4196 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4197 4198 The DIAGONAL portion of the local submatrix on any given processor 4199 is the submatrix corresponding to the rows and columns m,n 4200 corresponding to the given processor. i.e diagonal matrix on 4201 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4202 etc. The remaining portion of the local submatrix [m x (N-n)] 4203 constitute the OFF-DIAGONAL portion. The example below better 4204 illustrates this concept. 4205 4206 For a square global matrix we define each processor's diagonal portion 4207 to be its local rows and the corresponding columns (a square submatrix); 4208 each processor's off-diagonal portion encompasses the remainder of the 4209 local matrix (a rectangular submatrix). 4210 4211 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4212 4213 When calling this routine with a single process communicator, a matrix of 4214 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4215 type of communicator, use the construction mechanism: 4216 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4217 4218 By default, this format uses inodes (identical nodes) when possible. 4219 We search for consecutive rows with the same nonzero structure, thereby 4220 reusing matrix information to achieve increased efficiency. 4221 4222 Options Database Keys: 4223 + -mat_no_inode - Do not use inodes 4224 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4225 - -mat_aij_oneindex - Internally use indexing starting at 1 4226 rather than 0. Note that when calling MatSetValues(), 4227 the user still MUST index entries starting at 0! 4228 4229 4230 Example usage: 4231 4232 Consider the following 8x8 matrix with 34 non-zero values, that is 4233 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4234 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4235 as follows: 4236 4237 .vb 4238 1 2 0 | 0 3 0 | 0 4 4239 Proc0 0 5 6 | 7 0 0 | 8 0 4240 9 0 10 | 11 0 0 | 12 0 4241 ------------------------------------- 4242 13 0 14 | 15 16 17 | 0 0 4243 Proc1 0 18 0 | 19 20 21 | 0 0 4244 0 0 0 | 22 23 0 | 24 0 4245 ------------------------------------- 4246 Proc2 25 26 27 | 0 0 28 | 29 0 4247 30 0 0 | 31 32 33 | 0 34 4248 .ve 4249 4250 This can be represented as a collection of submatrices as: 4251 4252 .vb 4253 A B C 4254 D E F 4255 G H I 4256 .ve 4257 4258 Where the submatrices A,B,C are owned by proc0, D,E,F are 4259 owned by proc1, G,H,I are owned by proc2. 4260 4261 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4262 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4263 The 'M','N' parameters are 8,8, and have the same values on all procs. 4264 4265 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4266 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4267 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4268 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4269 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4270 matrix, ans [DF] as another SeqAIJ matrix. 4271 4272 When d_nz, o_nz parameters are specified, d_nz storage elements are 4273 allocated for every row of the local diagonal submatrix, and o_nz 4274 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4275 One way to choose d_nz and o_nz is to use the max nonzerors per local 4276 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4277 In this case, the values of d_nz,o_nz are: 4278 .vb 4279 proc0 : dnz = 2, o_nz = 2 4280 proc1 : dnz = 3, o_nz = 2 4281 proc2 : dnz = 1, o_nz = 4 4282 .ve 4283 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4284 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4285 for proc3. i.e we are using 12+15+10=37 storage locations to store 4286 34 values. 4287 4288 When d_nnz, o_nnz parameters are specified, the storage is specified 4289 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4290 In the above case the values for d_nnz,o_nnz are: 4291 .vb 4292 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4293 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4294 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4295 .ve 4296 Here the space allocated is sum of all the above values i.e 34, and 4297 hence pre-allocation is perfect. 4298 4299 Level: intermediate 4300 4301 .keywords: matrix, aij, compressed row, sparse, parallel 4302 4303 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4304 MPIAIJ, MatCreateMPIAIJWithArrays() 4305 @*/ 4306 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4307 { 4308 PetscErrorCode ierr; 4309 PetscMPIInt size; 4310 4311 PetscFunctionBegin; 4312 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4313 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4314 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4315 if (size > 1) { 4316 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4317 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4318 } else { 4319 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4320 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4321 } 4322 PetscFunctionReturn(0); 4323 } 4324 4325 #undef __FUNCT__ 4326 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4327 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4328 { 4329 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4330 4331 PetscFunctionBegin; 4332 *Ad = a->A; 4333 *Ao = a->B; 4334 *colmap = a->garray; 4335 PetscFunctionReturn(0); 4336 } 4337 4338 #undef __FUNCT__ 4339 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4340 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4341 { 4342 PetscErrorCode ierr; 4343 PetscInt i; 4344 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4345 4346 PetscFunctionBegin; 4347 if (coloring->ctype == IS_COLORING_GLOBAL) { 4348 ISColoringValue *allcolors,*colors; 4349 ISColoring ocoloring; 4350 4351 /* set coloring for diagonal portion */ 4352 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4353 4354 /* set coloring for off-diagonal portion */ 4355 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4356 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4357 for (i=0; i<a->B->cmap->n; i++) { 4358 colors[i] = allcolors[a->garray[i]]; 4359 } 4360 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4361 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4362 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4363 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4364 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4365 ISColoringValue *colors; 4366 PetscInt *larray; 4367 ISColoring ocoloring; 4368 4369 /* set coloring for diagonal portion */ 4370 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4371 for (i=0; i<a->A->cmap->n; i++) { 4372 larray[i] = i + A->cmap->rstart; 4373 } 4374 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4375 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4376 for (i=0; i<a->A->cmap->n; i++) { 4377 colors[i] = coloring->colors[larray[i]]; 4378 } 4379 ierr = PetscFree(larray);CHKERRQ(ierr); 4380 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4381 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4382 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4383 4384 /* set coloring for off-diagonal portion */ 4385 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4386 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4387 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4388 for (i=0; i<a->B->cmap->n; i++) { 4389 colors[i] = coloring->colors[larray[i]]; 4390 } 4391 ierr = PetscFree(larray);CHKERRQ(ierr); 4392 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4393 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4394 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4395 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4396 4397 PetscFunctionReturn(0); 4398 } 4399 4400 #undef __FUNCT__ 4401 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4402 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4403 { 4404 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4405 PetscErrorCode ierr; 4406 4407 PetscFunctionBegin; 4408 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4409 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4410 PetscFunctionReturn(0); 4411 } 4412 4413 #undef __FUNCT__ 4414 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4415 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4416 { 4417 PetscErrorCode ierr; 4418 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4419 PetscInt *indx; 4420 4421 PetscFunctionBegin; 4422 /* This routine will ONLY return MPIAIJ type matrix */ 4423 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4424 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4425 if (n == PETSC_DECIDE) { 4426 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4427 } 4428 /* Check sum(n) = N */ 4429 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4430 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4431 4432 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4433 rstart -= m; 4434 4435 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4436 for (i=0;i<m;i++) { 4437 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4438 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4439 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4440 } 4441 4442 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4443 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4444 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4445 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4446 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4447 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4448 PetscFunctionReturn(0); 4449 } 4450 4451 #undef __FUNCT__ 4452 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4453 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4454 { 4455 PetscErrorCode ierr; 4456 PetscInt m,N,i,rstart,nnz,Ii; 4457 PetscInt *indx; 4458 PetscScalar *values; 4459 4460 PetscFunctionBegin; 4461 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4462 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4463 for (i=0;i<m;i++) { 4464 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4465 Ii = i + rstart; 4466 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4467 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4468 } 4469 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4470 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4471 PetscFunctionReturn(0); 4472 } 4473 4474 #undef __FUNCT__ 4475 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4476 /*@ 4477 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4478 matrices from each processor 4479 4480 Collective on MPI_Comm 4481 4482 Input Parameters: 4483 + comm - the communicators the parallel matrix will live on 4484 . inmat - the input sequential matrices 4485 . n - number of local columns (or PETSC_DECIDE) 4486 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4487 4488 Output Parameter: 4489 . outmat - the parallel matrix generated 4490 4491 Level: advanced 4492 4493 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4494 4495 @*/ 4496 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4497 { 4498 PetscErrorCode ierr; 4499 4500 PetscFunctionBegin; 4501 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4502 if (scall == MAT_INITIAL_MATRIX) { 4503 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4504 } 4505 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4506 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4507 PetscFunctionReturn(0); 4508 } 4509 4510 #undef __FUNCT__ 4511 #define __FUNCT__ "MatFileSplit" 4512 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4513 { 4514 PetscErrorCode ierr; 4515 PetscMPIInt rank; 4516 PetscInt m,N,i,rstart,nnz; 4517 size_t len; 4518 const PetscInt *indx; 4519 PetscViewer out; 4520 char *name; 4521 Mat B; 4522 const PetscScalar *values; 4523 4524 PetscFunctionBegin; 4525 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4526 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4527 /* Should this be the type of the diagonal block of A? */ 4528 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4529 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4530 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4531 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4532 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4533 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4534 for (i=0;i<m;i++) { 4535 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4536 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4537 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4538 } 4539 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4540 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4541 4542 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4543 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4544 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4545 sprintf(name,"%s.%d",outfile,rank); 4546 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4547 ierr = PetscFree(name);CHKERRQ(ierr); 4548 ierr = MatView(B,out);CHKERRQ(ierr); 4549 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4550 ierr = MatDestroy(&B);CHKERRQ(ierr); 4551 PetscFunctionReturn(0); 4552 } 4553 4554 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4555 #undef __FUNCT__ 4556 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4557 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4558 { 4559 PetscErrorCode ierr; 4560 Mat_Merge_SeqsToMPI *merge; 4561 PetscContainer container; 4562 4563 PetscFunctionBegin; 4564 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4565 if (container) { 4566 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4567 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4568 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4569 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4570 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4571 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4572 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4573 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4574 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4575 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4576 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4577 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4578 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4579 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4580 ierr = PetscFree(merge);CHKERRQ(ierr); 4581 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4582 } 4583 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4584 PetscFunctionReturn(0); 4585 } 4586 4587 #include <../src/mat/utils/freespace.h> 4588 #include <petscbt.h> 4589 4590 #undef __FUNCT__ 4591 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4592 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4593 { 4594 PetscErrorCode ierr; 4595 MPI_Comm comm=((PetscObject)mpimat)->comm; 4596 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4597 PetscMPIInt size,rank,taga,*len_s; 4598 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4599 PetscInt proc,m; 4600 PetscInt **buf_ri,**buf_rj; 4601 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4602 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4603 MPI_Request *s_waits,*r_waits; 4604 MPI_Status *status; 4605 MatScalar *aa=a->a; 4606 MatScalar **abuf_r,*ba_i; 4607 Mat_Merge_SeqsToMPI *merge; 4608 PetscContainer container; 4609 4610 PetscFunctionBegin; 4611 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4612 4613 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4614 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4615 4616 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4617 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4618 4619 bi = merge->bi; 4620 bj = merge->bj; 4621 buf_ri = merge->buf_ri; 4622 buf_rj = merge->buf_rj; 4623 4624 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4625 owners = merge->rowmap->range; 4626 len_s = merge->len_s; 4627 4628 /* send and recv matrix values */ 4629 /*-----------------------------*/ 4630 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4631 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4632 4633 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4634 for (proc=0,k=0; proc<size; proc++) { 4635 if (!len_s[proc]) continue; 4636 i = owners[proc]; 4637 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4638 k++; 4639 } 4640 4641 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4642 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4643 ierr = PetscFree(status);CHKERRQ(ierr); 4644 4645 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4646 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4647 4648 /* insert mat values of mpimat */ 4649 /*----------------------------*/ 4650 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4651 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4652 4653 for (k=0; k<merge->nrecv; k++) { 4654 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4655 nrows = *(buf_ri_k[k]); 4656 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4657 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4658 } 4659 4660 /* set values of ba */ 4661 m = merge->rowmap->n; 4662 for (i=0; i<m; i++) { 4663 arow = owners[rank] + i; 4664 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4665 bnzi = bi[i+1] - bi[i]; 4666 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4667 4668 /* add local non-zero vals of this proc's seqmat into ba */ 4669 anzi = ai[arow+1] - ai[arow]; 4670 aj = a->j + ai[arow]; 4671 aa = a->a + ai[arow]; 4672 nextaj = 0; 4673 for (j=0; nextaj<anzi; j++) { 4674 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4675 ba_i[j] += aa[nextaj++]; 4676 } 4677 } 4678 4679 /* add received vals into ba */ 4680 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4681 /* i-th row */ 4682 if (i == *nextrow[k]) { 4683 anzi = *(nextai[k]+1) - *nextai[k]; 4684 aj = buf_rj[k] + *(nextai[k]); 4685 aa = abuf_r[k] + *(nextai[k]); 4686 nextaj = 0; 4687 for (j=0; nextaj<anzi; j++) { 4688 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4689 ba_i[j] += aa[nextaj++]; 4690 } 4691 } 4692 nextrow[k]++; nextai[k]++; 4693 } 4694 } 4695 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4696 } 4697 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4698 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4699 4700 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4701 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4702 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4703 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4704 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4705 PetscFunctionReturn(0); 4706 } 4707 4708 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4709 4710 #undef __FUNCT__ 4711 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4712 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4713 { 4714 PetscErrorCode ierr; 4715 Mat B_mpi; 4716 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4717 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4718 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4719 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4720 PetscInt len,proc,*dnz,*onz,bs,cbs; 4721 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4722 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4723 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4724 MPI_Status *status; 4725 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4726 PetscBT lnkbt; 4727 Mat_Merge_SeqsToMPI *merge; 4728 PetscContainer container; 4729 4730 PetscFunctionBegin; 4731 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4732 4733 /* make sure it is a PETSc comm */ 4734 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4735 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4736 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4737 4738 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4739 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4740 4741 /* determine row ownership */ 4742 /*---------------------------------------------------------*/ 4743 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4744 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4745 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4746 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4747 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4748 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4749 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4750 4751 m = merge->rowmap->n; 4752 owners = merge->rowmap->range; 4753 4754 /* determine the number of messages to send, their lengths */ 4755 /*---------------------------------------------------------*/ 4756 len_s = merge->len_s; 4757 4758 len = 0; /* length of buf_si[] */ 4759 merge->nsend = 0; 4760 for (proc=0; proc<size; proc++) { 4761 len_si[proc] = 0; 4762 if (proc == rank) { 4763 len_s[proc] = 0; 4764 } else { 4765 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4766 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4767 } 4768 if (len_s[proc]) { 4769 merge->nsend++; 4770 nrows = 0; 4771 for (i=owners[proc]; i<owners[proc+1]; i++) { 4772 if (ai[i+1] > ai[i]) nrows++; 4773 } 4774 len_si[proc] = 2*(nrows+1); 4775 len += len_si[proc]; 4776 } 4777 } 4778 4779 /* determine the number and length of messages to receive for ij-structure */ 4780 /*-------------------------------------------------------------------------*/ 4781 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4782 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4783 4784 /* post the Irecv of j-structure */ 4785 /*-------------------------------*/ 4786 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4787 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4788 4789 /* post the Isend of j-structure */ 4790 /*--------------------------------*/ 4791 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4792 4793 for (proc=0, k=0; proc<size; proc++) { 4794 if (!len_s[proc]) continue; 4795 i = owners[proc]; 4796 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4797 k++; 4798 } 4799 4800 /* receives and sends of j-structure are complete */ 4801 /*------------------------------------------------*/ 4802 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4803 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4804 4805 /* send and recv i-structure */ 4806 /*---------------------------*/ 4807 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4808 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4809 4810 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4811 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4812 for (proc=0,k=0; proc<size; proc++) { 4813 if (!len_s[proc]) continue; 4814 /* form outgoing message for i-structure: 4815 buf_si[0]: nrows to be sent 4816 [1:nrows]: row index (global) 4817 [nrows+1:2*nrows+1]: i-structure index 4818 */ 4819 /*-------------------------------------------*/ 4820 nrows = len_si[proc]/2 - 1; 4821 buf_si_i = buf_si + nrows+1; 4822 buf_si[0] = nrows; 4823 buf_si_i[0] = 0; 4824 nrows = 0; 4825 for (i=owners[proc]; i<owners[proc+1]; i++) { 4826 anzi = ai[i+1] - ai[i]; 4827 if (anzi) { 4828 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4829 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4830 nrows++; 4831 } 4832 } 4833 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4834 k++; 4835 buf_si += len_si[proc]; 4836 } 4837 4838 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4839 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4840 4841 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4842 for (i=0; i<merge->nrecv; i++) { 4843 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4844 } 4845 4846 ierr = PetscFree(len_si);CHKERRQ(ierr); 4847 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4848 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4849 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4850 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4851 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4852 ierr = PetscFree(status);CHKERRQ(ierr); 4853 4854 /* compute a local seq matrix in each processor */ 4855 /*----------------------------------------------*/ 4856 /* allocate bi array and free space for accumulating nonzero column info */ 4857 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4858 bi[0] = 0; 4859 4860 /* create and initialize a linked list */ 4861 nlnk = N+1; 4862 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4863 4864 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4865 len = ai[owners[rank+1]] - ai[owners[rank]]; 4866 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4867 current_space = free_space; 4868 4869 /* determine symbolic info for each local row */ 4870 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4871 4872 for (k=0; k<merge->nrecv; k++) { 4873 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4874 nrows = *buf_ri_k[k]; 4875 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4876 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4877 } 4878 4879 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4880 len = 0; 4881 for (i=0;i<m;i++) { 4882 bnzi = 0; 4883 /* add local non-zero cols of this proc's seqmat into lnk */ 4884 arow = owners[rank] + i; 4885 anzi = ai[arow+1] - ai[arow]; 4886 aj = a->j + ai[arow]; 4887 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4888 bnzi += nlnk; 4889 /* add received col data into lnk */ 4890 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4891 if (i == *nextrow[k]) { /* i-th row */ 4892 anzi = *(nextai[k]+1) - *nextai[k]; 4893 aj = buf_rj[k] + *nextai[k]; 4894 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4895 bnzi += nlnk; 4896 nextrow[k]++; nextai[k]++; 4897 } 4898 } 4899 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4900 4901 /* if free space is not available, make more free space */ 4902 if (current_space->local_remaining<bnzi) { 4903 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4904 nspacedouble++; 4905 } 4906 /* copy data into free space, then initialize lnk */ 4907 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4908 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4909 4910 current_space->array += bnzi; 4911 current_space->local_used += bnzi; 4912 current_space->local_remaining -= bnzi; 4913 4914 bi[i+1] = bi[i] + bnzi; 4915 } 4916 4917 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4918 4919 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4920 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4921 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4922 4923 /* create symbolic parallel matrix B_mpi */ 4924 /*---------------------------------------*/ 4925 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4926 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4927 if (n==PETSC_DECIDE) { 4928 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4929 } else { 4930 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4931 } 4932 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4933 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4934 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4935 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4936 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4937 4938 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4939 B_mpi->assembled = PETSC_FALSE; 4940 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4941 merge->bi = bi; 4942 merge->bj = bj; 4943 merge->buf_ri = buf_ri; 4944 merge->buf_rj = buf_rj; 4945 merge->coi = PETSC_NULL; 4946 merge->coj = PETSC_NULL; 4947 merge->owners_co = PETSC_NULL; 4948 4949 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4950 4951 /* attach the supporting struct to B_mpi for reuse */ 4952 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4953 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4954 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4955 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4956 *mpimat = B_mpi; 4957 4958 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4959 PetscFunctionReturn(0); 4960 } 4961 4962 #undef __FUNCT__ 4963 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4964 /*@C 4965 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4966 matrices from each processor 4967 4968 Collective on MPI_Comm 4969 4970 Input Parameters: 4971 + comm - the communicators the parallel matrix will live on 4972 . seqmat - the input sequential matrices 4973 . m - number of local rows (or PETSC_DECIDE) 4974 . n - number of local columns (or PETSC_DECIDE) 4975 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4976 4977 Output Parameter: 4978 . mpimat - the parallel matrix generated 4979 4980 Level: advanced 4981 4982 Notes: 4983 The dimensions of the sequential matrix in each processor MUST be the same. 4984 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4985 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4986 @*/ 4987 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4988 { 4989 PetscErrorCode ierr; 4990 PetscMPIInt size; 4991 4992 PetscFunctionBegin; 4993 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4994 if (size == 1) { 4995 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4996 if (scall == MAT_INITIAL_MATRIX) { 4997 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4998 } else { 4999 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5000 } 5001 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5002 PetscFunctionReturn(0); 5003 } 5004 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5005 if (scall == MAT_INITIAL_MATRIX) { 5006 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5007 } 5008 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5009 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5010 PetscFunctionReturn(0); 5011 } 5012 5013 #undef __FUNCT__ 5014 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5015 /*@ 5016 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5017 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5018 with MatGetSize() 5019 5020 Not Collective 5021 5022 Input Parameters: 5023 + A - the matrix 5024 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5025 5026 Output Parameter: 5027 . A_loc - the local sequential matrix generated 5028 5029 Level: developer 5030 5031 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5032 5033 @*/ 5034 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5035 { 5036 PetscErrorCode ierr; 5037 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5038 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5039 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5040 MatScalar *aa=a->a,*ba=b->a,*cam; 5041 PetscScalar *ca; 5042 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5043 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5044 PetscBool match; 5045 5046 PetscFunctionBegin; 5047 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5048 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5049 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5050 if (scall == MAT_INITIAL_MATRIX) { 5051 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5052 ci[0] = 0; 5053 for (i=0; i<am; i++) { 5054 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5055 } 5056 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5057 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5058 k = 0; 5059 for (i=0; i<am; i++) { 5060 ncols_o = bi[i+1] - bi[i]; 5061 ncols_d = ai[i+1] - ai[i]; 5062 /* off-diagonal portion of A */ 5063 for (jo=0; jo<ncols_o; jo++) { 5064 col = cmap[*bj]; 5065 if (col >= cstart) break; 5066 cj[k] = col; bj++; 5067 ca[k++] = *ba++; 5068 } 5069 /* diagonal portion of A */ 5070 for (j=0; j<ncols_d; j++) { 5071 cj[k] = cstart + *aj++; 5072 ca[k++] = *aa++; 5073 } 5074 /* off-diagonal portion of A */ 5075 for (j=jo; j<ncols_o; j++) { 5076 cj[k] = cmap[*bj++]; 5077 ca[k++] = *ba++; 5078 } 5079 } 5080 /* put together the new matrix */ 5081 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5082 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5083 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5084 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5085 mat->free_a = PETSC_TRUE; 5086 mat->free_ij = PETSC_TRUE; 5087 mat->nonew = 0; 5088 } else if (scall == MAT_REUSE_MATRIX) { 5089 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5090 ci = mat->i; cj = mat->j; cam = mat->a; 5091 for (i=0; i<am; i++) { 5092 /* off-diagonal portion of A */ 5093 ncols_o = bi[i+1] - bi[i]; 5094 for (jo=0; jo<ncols_o; jo++) { 5095 col = cmap[*bj]; 5096 if (col >= cstart) break; 5097 *cam++ = *ba++; bj++; 5098 } 5099 /* diagonal portion of A */ 5100 ncols_d = ai[i+1] - ai[i]; 5101 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5102 /* off-diagonal portion of A */ 5103 for (j=jo; j<ncols_o; j++) { 5104 *cam++ = *ba++; bj++; 5105 } 5106 } 5107 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5108 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5109 PetscFunctionReturn(0); 5110 } 5111 5112 #undef __FUNCT__ 5113 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5114 /*@C 5115 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5116 5117 Not Collective 5118 5119 Input Parameters: 5120 + A - the matrix 5121 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5122 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5123 5124 Output Parameter: 5125 . A_loc - the local sequential matrix generated 5126 5127 Level: developer 5128 5129 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5130 5131 @*/ 5132 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5133 { 5134 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5135 PetscErrorCode ierr; 5136 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5137 IS isrowa,iscola; 5138 Mat *aloc; 5139 PetscBool match; 5140 5141 PetscFunctionBegin; 5142 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5143 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5144 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5145 if (!row) { 5146 start = A->rmap->rstart; end = A->rmap->rend; 5147 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5148 } else { 5149 isrowa = *row; 5150 } 5151 if (!col) { 5152 start = A->cmap->rstart; 5153 cmap = a->garray; 5154 nzA = a->A->cmap->n; 5155 nzB = a->B->cmap->n; 5156 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5157 ncols = 0; 5158 for (i=0; i<nzB; i++) { 5159 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5160 else break; 5161 } 5162 imark = i; 5163 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5164 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5165 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5166 } else { 5167 iscola = *col; 5168 } 5169 if (scall != MAT_INITIAL_MATRIX) { 5170 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5171 aloc[0] = *A_loc; 5172 } 5173 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5174 *A_loc = aloc[0]; 5175 ierr = PetscFree(aloc);CHKERRQ(ierr); 5176 if (!row) { 5177 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5178 } 5179 if (!col) { 5180 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5181 } 5182 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5183 PetscFunctionReturn(0); 5184 } 5185 5186 #undef __FUNCT__ 5187 #define __FUNCT__ "MatGetBrowsOfAcols" 5188 /*@C 5189 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5190 5191 Collective on Mat 5192 5193 Input Parameters: 5194 + A,B - the matrices in mpiaij format 5195 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5196 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5197 5198 Output Parameter: 5199 + rowb, colb - index sets of rows and columns of B to extract 5200 - B_seq - the sequential matrix generated 5201 5202 Level: developer 5203 5204 @*/ 5205 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5206 { 5207 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5208 PetscErrorCode ierr; 5209 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5210 IS isrowb,iscolb; 5211 Mat *bseq=PETSC_NULL; 5212 5213 PetscFunctionBegin; 5214 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5215 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5216 } 5217 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5218 5219 if (scall == MAT_INITIAL_MATRIX) { 5220 start = A->cmap->rstart; 5221 cmap = a->garray; 5222 nzA = a->A->cmap->n; 5223 nzB = a->B->cmap->n; 5224 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5225 ncols = 0; 5226 for (i=0; i<nzB; i++) { /* row < local row index */ 5227 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5228 else break; 5229 } 5230 imark = i; 5231 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5232 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5233 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5234 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5235 } else { 5236 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5237 isrowb = *rowb; iscolb = *colb; 5238 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5239 bseq[0] = *B_seq; 5240 } 5241 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5242 *B_seq = bseq[0]; 5243 ierr = PetscFree(bseq);CHKERRQ(ierr); 5244 if (!rowb) { 5245 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5246 } else { 5247 *rowb = isrowb; 5248 } 5249 if (!colb) { 5250 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5251 } else { 5252 *colb = iscolb; 5253 } 5254 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5255 PetscFunctionReturn(0); 5256 } 5257 5258 #undef __FUNCT__ 5259 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5260 /* 5261 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5262 of the OFF-DIAGONAL portion of local A 5263 5264 Collective on Mat 5265 5266 Input Parameters: 5267 + A,B - the matrices in mpiaij format 5268 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5269 5270 Output Parameter: 5271 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5272 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5273 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5274 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5275 5276 Level: developer 5277 5278 */ 5279 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5280 { 5281 VecScatter_MPI_General *gen_to,*gen_from; 5282 PetscErrorCode ierr; 5283 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5284 Mat_SeqAIJ *b_oth; 5285 VecScatter ctx=a->Mvctx; 5286 MPI_Comm comm=((PetscObject)ctx)->comm; 5287 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5288 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5289 PetscScalar *rvalues,*svalues; 5290 MatScalar *b_otha,*bufa,*bufA; 5291 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5292 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5293 MPI_Status *sstatus,rstatus; 5294 PetscMPIInt jj; 5295 PetscInt *cols,sbs,rbs; 5296 PetscScalar *vals; 5297 5298 PetscFunctionBegin; 5299 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5300 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5301 } 5302 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5303 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5304 5305 gen_to = (VecScatter_MPI_General*)ctx->todata; 5306 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5307 rvalues = gen_from->values; /* holds the length of receiving row */ 5308 svalues = gen_to->values; /* holds the length of sending row */ 5309 nrecvs = gen_from->n; 5310 nsends = gen_to->n; 5311 5312 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5313 srow = gen_to->indices; /* local row index to be sent */ 5314 sstarts = gen_to->starts; 5315 sprocs = gen_to->procs; 5316 sstatus = gen_to->sstatus; 5317 sbs = gen_to->bs; 5318 rstarts = gen_from->starts; 5319 rprocs = gen_from->procs; 5320 rbs = gen_from->bs; 5321 5322 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5323 if (scall == MAT_INITIAL_MATRIX) { 5324 /* i-array */ 5325 /*---------*/ 5326 /* post receives */ 5327 for (i=0; i<nrecvs; i++) { 5328 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5329 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5330 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5331 } 5332 5333 /* pack the outgoing message */ 5334 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5335 sstartsj[0] = 0; rstartsj[0] = 0; 5336 len = 0; /* total length of j or a array to be sent */ 5337 k = 0; 5338 for (i=0; i<nsends; i++) { 5339 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5340 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5341 for (j=0; j<nrows; j++) { 5342 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5343 for (l=0; l<sbs; l++) { 5344 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5345 rowlen[j*sbs+l] = ncols; 5346 len += ncols; 5347 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5348 } 5349 k++; 5350 } 5351 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5352 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5353 } 5354 /* recvs and sends of i-array are completed */ 5355 i = nrecvs; 5356 while (i--) { 5357 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5358 } 5359 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5360 5361 /* allocate buffers for sending j and a arrays */ 5362 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5363 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5364 5365 /* create i-array of B_oth */ 5366 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5367 b_othi[0] = 0; 5368 len = 0; /* total length of j or a array to be received */ 5369 k = 0; 5370 for (i=0; i<nrecvs; i++) { 5371 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5372 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5373 for (j=0; j<nrows; j++) { 5374 b_othi[k+1] = b_othi[k] + rowlen[j]; 5375 len += rowlen[j]; k++; 5376 } 5377 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5378 } 5379 5380 /* allocate space for j and a arrrays of B_oth */ 5381 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5382 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5383 5384 /* j-array */ 5385 /*---------*/ 5386 /* post receives of j-array */ 5387 for (i=0; i<nrecvs; i++) { 5388 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5389 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5390 } 5391 5392 /* pack the outgoing message j-array */ 5393 k = 0; 5394 for (i=0; i<nsends; i++) { 5395 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5396 bufJ = bufj+sstartsj[i]; 5397 for (j=0; j<nrows; j++) { 5398 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5399 for (ll=0; ll<sbs; ll++) { 5400 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5401 for (l=0; l<ncols; l++) { 5402 *bufJ++ = cols[l]; 5403 } 5404 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5405 } 5406 } 5407 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5408 } 5409 5410 /* recvs and sends of j-array are completed */ 5411 i = nrecvs; 5412 while (i--) { 5413 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5414 } 5415 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5416 } else if (scall == MAT_REUSE_MATRIX) { 5417 sstartsj = *startsj_s; 5418 rstartsj = *startsj_r; 5419 bufa = *bufa_ptr; 5420 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5421 b_otha = b_oth->a; 5422 } else { 5423 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5424 } 5425 5426 /* a-array */ 5427 /*---------*/ 5428 /* post receives of a-array */ 5429 for (i=0; i<nrecvs; i++) { 5430 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5431 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5432 } 5433 5434 /* pack the outgoing message a-array */ 5435 k = 0; 5436 for (i=0; i<nsends; i++) { 5437 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5438 bufA = bufa+sstartsj[i]; 5439 for (j=0; j<nrows; j++) { 5440 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5441 for (ll=0; ll<sbs; ll++) { 5442 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5443 for (l=0; l<ncols; l++) { 5444 *bufA++ = vals[l]; 5445 } 5446 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5447 } 5448 } 5449 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5450 } 5451 /* recvs and sends of a-array are completed */ 5452 i = nrecvs; 5453 while (i--) { 5454 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5455 } 5456 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5457 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5458 5459 if (scall == MAT_INITIAL_MATRIX) { 5460 /* put together the new matrix */ 5461 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5462 5463 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5464 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5465 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5466 b_oth->free_a = PETSC_TRUE; 5467 b_oth->free_ij = PETSC_TRUE; 5468 b_oth->nonew = 0; 5469 5470 ierr = PetscFree(bufj);CHKERRQ(ierr); 5471 if (!startsj_s || !bufa_ptr) { 5472 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5473 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5474 } else { 5475 *startsj_s = sstartsj; 5476 *startsj_r = rstartsj; 5477 *bufa_ptr = bufa; 5478 } 5479 } 5480 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5481 PetscFunctionReturn(0); 5482 } 5483 5484 #undef __FUNCT__ 5485 #define __FUNCT__ "MatGetCommunicationStructs" 5486 /*@C 5487 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5488 5489 Not Collective 5490 5491 Input Parameters: 5492 . A - The matrix in mpiaij format 5493 5494 Output Parameter: 5495 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5496 . colmap - A map from global column index to local index into lvec 5497 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5498 5499 Level: developer 5500 5501 @*/ 5502 #if defined (PETSC_USE_CTABLE) 5503 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5504 #else 5505 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5506 #endif 5507 { 5508 Mat_MPIAIJ *a; 5509 5510 PetscFunctionBegin; 5511 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5512 PetscValidPointer(lvec, 2); 5513 PetscValidPointer(colmap, 3); 5514 PetscValidPointer(multScatter, 4); 5515 a = (Mat_MPIAIJ *) A->data; 5516 if (lvec) *lvec = a->lvec; 5517 if (colmap) *colmap = a->colmap; 5518 if (multScatter) *multScatter = a->Mvctx; 5519 PetscFunctionReturn(0); 5520 } 5521 5522 EXTERN_C_BEGIN 5523 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5524 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5525 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5526 EXTERN_C_END 5527 5528 #undef __FUNCT__ 5529 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5530 /* 5531 Computes (B'*A')' since computing B*A directly is untenable 5532 5533 n p p 5534 ( ) ( ) ( ) 5535 m ( A ) * n ( B ) = m ( C ) 5536 ( ) ( ) ( ) 5537 5538 */ 5539 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5540 { 5541 PetscErrorCode ierr; 5542 Mat At,Bt,Ct; 5543 5544 PetscFunctionBegin; 5545 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5546 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5547 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5548 ierr = MatDestroy(&At);CHKERRQ(ierr); 5549 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5550 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5551 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5552 PetscFunctionReturn(0); 5553 } 5554 5555 #undef __FUNCT__ 5556 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5557 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5558 { 5559 PetscErrorCode ierr; 5560 PetscInt m=A->rmap->n,n=B->cmap->n; 5561 Mat Cmat; 5562 5563 PetscFunctionBegin; 5564 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5565 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5566 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5567 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5568 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5569 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5570 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5571 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5572 5573 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5574 *C = Cmat; 5575 PetscFunctionReturn(0); 5576 } 5577 5578 /* ----------------------------------------------------------------*/ 5579 #undef __FUNCT__ 5580 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5581 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5582 { 5583 PetscErrorCode ierr; 5584 5585 PetscFunctionBegin; 5586 if (scall == MAT_INITIAL_MATRIX) { 5587 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5588 } 5589 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5590 PetscFunctionReturn(0); 5591 } 5592 5593 EXTERN_C_BEGIN 5594 #if defined(PETSC_HAVE_MUMPS) 5595 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5596 #endif 5597 #if defined(PETSC_HAVE_PASTIX) 5598 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5599 #endif 5600 #if defined(PETSC_HAVE_SUPERLU_DIST) 5601 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5602 #endif 5603 #if defined(PETSC_HAVE_CLIQUE) 5604 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5605 #endif 5606 EXTERN_C_END 5607 5608 /*MC 5609 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5610 5611 Options Database Keys: 5612 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5613 5614 Level: beginner 5615 5616 .seealso: MatCreateAIJ() 5617 M*/ 5618 5619 EXTERN_C_BEGIN 5620 #undef __FUNCT__ 5621 #define __FUNCT__ "MatCreate_MPIAIJ" 5622 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5623 { 5624 Mat_MPIAIJ *b; 5625 PetscErrorCode ierr; 5626 PetscMPIInt size; 5627 5628 PetscFunctionBegin; 5629 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5630 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5631 B->data = (void*)b; 5632 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5633 B->assembled = PETSC_FALSE; 5634 B->insertmode = NOT_SET_VALUES; 5635 b->size = size; 5636 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5637 5638 /* build cache for off array entries formed */ 5639 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5640 b->donotstash = PETSC_FALSE; 5641 b->colmap = 0; 5642 b->garray = 0; 5643 b->roworiented = PETSC_TRUE; 5644 5645 /* stuff used for matrix vector multiply */ 5646 b->lvec = PETSC_NULL; 5647 b->Mvctx = PETSC_NULL; 5648 5649 /* stuff for MatGetRow() */ 5650 b->rowindices = 0; 5651 b->rowvalues = 0; 5652 b->getrowactive = PETSC_FALSE; 5653 5654 /* flexible pointer used in CUSP/CUSPARSE classes */ 5655 b->spptr = PETSC_NULL; 5656 5657 #if defined(PETSC_HAVE_MUMPS) 5658 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5659 "MatGetFactor_aij_mumps", 5660 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5661 #endif 5662 #if defined(PETSC_HAVE_PASTIX) 5663 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5664 "MatGetFactor_mpiaij_pastix", 5665 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5666 #endif 5667 #if defined(PETSC_HAVE_SUPERLU_DIST) 5668 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5669 "MatGetFactor_mpiaij_superlu_dist", 5670 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5671 #endif 5672 #if defined(PETSC_HAVE_CLIQUE) 5673 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5674 "MatGetFactor_aij_clique", 5675 MatGetFactor_aij_clique);CHKERRQ(ierr); 5676 #endif 5677 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5678 "MatStoreValues_MPIAIJ", 5679 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5680 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5681 "MatRetrieveValues_MPIAIJ", 5682 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5683 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5684 "MatGetDiagonalBlock_MPIAIJ", 5685 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5686 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5687 "MatIsTranspose_MPIAIJ", 5688 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5689 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5690 "MatMPIAIJSetPreallocation_MPIAIJ", 5691 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5692 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5693 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5694 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5695 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5696 "MatDiagonalScaleLocal_MPIAIJ", 5697 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5698 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5699 "MatConvert_MPIAIJ_MPIAIJPERM", 5700 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5701 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5702 "MatConvert_MPIAIJ_MPIAIJCRL", 5703 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5704 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5705 "MatConvert_MPIAIJ_MPISBAIJ", 5706 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5707 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5708 "MatMatMult_MPIDense_MPIAIJ", 5709 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5710 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5711 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5712 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5713 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5714 "MatMatMultNumeric_MPIDense_MPIAIJ", 5715 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5716 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5717 PetscFunctionReturn(0); 5718 } 5719 EXTERN_C_END 5720 5721 #undef __FUNCT__ 5722 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5723 /*@ 5724 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5725 and "off-diagonal" part of the matrix in CSR format. 5726 5727 Collective on MPI_Comm 5728 5729 Input Parameters: 5730 + comm - MPI communicator 5731 . m - number of local rows (Cannot be PETSC_DECIDE) 5732 . n - This value should be the same as the local size used in creating the 5733 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5734 calculated if N is given) For square matrices n is almost always m. 5735 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5736 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5737 . i - row indices for "diagonal" portion of matrix 5738 . j - column indices 5739 . a - matrix values 5740 . oi - row indices for "off-diagonal" portion of matrix 5741 . oj - column indices 5742 - oa - matrix values 5743 5744 Output Parameter: 5745 . mat - the matrix 5746 5747 Level: advanced 5748 5749 Notes: 5750 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5751 must free the arrays once the matrix has been destroyed and not before. 5752 5753 The i and j indices are 0 based 5754 5755 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5756 5757 This sets local rows and cannot be used to set off-processor values. 5758 5759 You cannot later use MatSetValues() to change values in this matrix. 5760 5761 .keywords: matrix, aij, compressed row, sparse, parallel 5762 5763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5764 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5765 @*/ 5766 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5767 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5768 { 5769 PetscErrorCode ierr; 5770 Mat_MPIAIJ *maij; 5771 5772 PetscFunctionBegin; 5773 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5774 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5775 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5776 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5777 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5778 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5779 maij = (Mat_MPIAIJ*) (*mat)->data; 5780 maij->donotstash = PETSC_TRUE; 5781 (*mat)->preallocated = PETSC_TRUE; 5782 5783 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5784 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5785 5786 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5787 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5788 5789 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5790 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5791 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5792 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5793 5794 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5795 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5796 PetscFunctionReturn(0); 5797 } 5798 5799 /* 5800 Special version for direct calls from Fortran 5801 */ 5802 #include <petsc-private/fortranimpl.h> 5803 5804 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5805 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5806 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5807 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5808 #endif 5809 5810 /* Change these macros so can be used in void function */ 5811 #undef CHKERRQ 5812 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5813 #undef SETERRQ2 5814 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5815 #undef SETERRQ3 5816 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5817 #undef SETERRQ 5818 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5819 5820 EXTERN_C_BEGIN 5821 #undef __FUNCT__ 5822 #define __FUNCT__ "matsetvaluesmpiaij_" 5823 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5824 { 5825 Mat mat = *mmat; 5826 PetscInt m = *mm, n = *mn; 5827 InsertMode addv = *maddv; 5828 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5829 PetscScalar value; 5830 PetscErrorCode ierr; 5831 5832 MatCheckPreallocated(mat,1); 5833 if (mat->insertmode == NOT_SET_VALUES) { 5834 mat->insertmode = addv; 5835 } 5836 #if defined(PETSC_USE_DEBUG) 5837 else if (mat->insertmode != addv) { 5838 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5839 } 5840 #endif 5841 { 5842 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5843 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5844 PetscBool roworiented = aij->roworiented; 5845 5846 /* Some Variables required in the macro */ 5847 Mat A = aij->A; 5848 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5849 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5850 MatScalar *aa = a->a; 5851 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5852 Mat B = aij->B; 5853 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5854 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5855 MatScalar *ba = b->a; 5856 5857 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5858 PetscInt nonew = a->nonew; 5859 MatScalar *ap1,*ap2; 5860 5861 PetscFunctionBegin; 5862 for (i=0; i<m; i++) { 5863 if (im[i] < 0) continue; 5864 #if defined(PETSC_USE_DEBUG) 5865 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5866 #endif 5867 if (im[i] >= rstart && im[i] < rend) { 5868 row = im[i] - rstart; 5869 lastcol1 = -1; 5870 rp1 = aj + ai[row]; 5871 ap1 = aa + ai[row]; 5872 rmax1 = aimax[row]; 5873 nrow1 = ailen[row]; 5874 low1 = 0; 5875 high1 = nrow1; 5876 lastcol2 = -1; 5877 rp2 = bj + bi[row]; 5878 ap2 = ba + bi[row]; 5879 rmax2 = bimax[row]; 5880 nrow2 = bilen[row]; 5881 low2 = 0; 5882 high2 = nrow2; 5883 5884 for (j=0; j<n; j++) { 5885 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5886 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5887 if (in[j] >= cstart && in[j] < cend) { 5888 col = in[j] - cstart; 5889 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5890 } else if (in[j] < 0) continue; 5891 #if defined(PETSC_USE_DEBUG) 5892 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5893 #endif 5894 else { 5895 if (mat->was_assembled) { 5896 if (!aij->colmap) { 5897 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5898 } 5899 #if defined (PETSC_USE_CTABLE) 5900 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5901 col--; 5902 #else 5903 col = aij->colmap[in[j]] - 1; 5904 #endif 5905 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5906 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5907 col = in[j]; 5908 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5909 B = aij->B; 5910 b = (Mat_SeqAIJ*)B->data; 5911 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5912 rp2 = bj + bi[row]; 5913 ap2 = ba + bi[row]; 5914 rmax2 = bimax[row]; 5915 nrow2 = bilen[row]; 5916 low2 = 0; 5917 high2 = nrow2; 5918 bm = aij->B->rmap->n; 5919 ba = b->a; 5920 } 5921 } else col = in[j]; 5922 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5923 } 5924 } 5925 } else { 5926 if (!aij->donotstash) { 5927 if (roworiented) { 5928 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5929 } else { 5930 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5931 } 5932 } 5933 } 5934 }} 5935 PetscFunctionReturnVoid(); 5936 } 5937 EXTERN_C_END 5938 5939