1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for (j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) { 215 rowners[i] += rowners[i-1]; 216 } 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 *inmat = mat; 300 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 301 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 302 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 303 mat = *inmat; 304 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 305 if (!rank) { 306 /* send numerical values to other processes */ 307 gmata = (Mat_SeqAIJ*) gmat->data; 308 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 309 gmataa = gmata->a; 310 for (i=1; i<size; i++) { 311 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 312 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 313 } 314 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 315 } else { 316 /* receive numerical values from process 0*/ 317 nz = Ad->nz + Ao->nz; 318 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 319 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 320 } 321 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 322 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 323 ad = Ad->a; 324 ao = Ao->a; 325 if (mat->rmap->n) { 326 i = 0; 327 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 328 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 329 } 330 for (i=1; i<mat->rmap->n; i++) { 331 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 i--; 335 if (mat->rmap->n) { 336 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 337 } 338 if (rank) { 339 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 340 } 341 } 342 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 CHKMEMQ; 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined (PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++) { 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 373 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 375 #endif 376 PetscFunctionReturn(0); 377 } 378 379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 380 { \ 381 if (col <= lastcol1) low1 = 0; \ 382 else high1 = nrow1; \ 383 lastcol1 = col;\ 384 while (high1-low1 > 5) { \ 385 t = (low1+high1)/2; \ 386 if (rp1[t] > col) high1 = t; \ 387 else low1 = t; \ 388 } \ 389 for (_i=low1; _i<high1; _i++) { \ 390 if (rp1[_i] > col) break; \ 391 if (rp1[_i] == col) { \ 392 if (addv == ADD_VALUES) ap1[_i] += value; \ 393 else ap1[_i] = value; \ 394 goto a_noinsert; \ 395 } \ 396 } \ 397 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 398 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 399 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 400 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 401 N = nrow1++ - 1; a->nz++; high1++; \ 402 /* shift up all the later entries in this row */ \ 403 for (ii=N; ii>=_i; ii--) { \ 404 rp1[ii+1] = rp1[ii]; \ 405 ap1[ii+1] = ap1[ii]; \ 406 } \ 407 rp1[_i] = col; \ 408 ap1[_i] = value; \ 409 a_noinsert: ; \ 410 ailen[row] = nrow1; \ 411 } 412 413 414 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 415 { \ 416 if (col <= lastcol2) low2 = 0; \ 417 else high2 = nrow2; \ 418 lastcol2 = col; \ 419 while (high2-low2 > 5) { \ 420 t = (low2+high2)/2; \ 421 if (rp2[t] > col) high2 = t; \ 422 else low2 = t; \ 423 } \ 424 for (_i=low2; _i<high2; _i++) { \ 425 if (rp2[_i] > col) break; \ 426 if (rp2[_i] == col) { \ 427 if (addv == ADD_VALUES) ap2[_i] += value; \ 428 else ap2[_i] = value; \ 429 goto b_noinsert; \ 430 } \ 431 } \ 432 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 433 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 434 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 435 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 436 N = nrow2++ - 1; b->nz++; high2++; \ 437 /* shift up all the later entries in this row */ \ 438 for (ii=N; ii>=_i; ii--) { \ 439 rp2[ii+1] = rp2[ii]; \ 440 ap2[ii+1] = ap2[ii]; \ 441 } \ 442 rp2[_i] = col; \ 443 ap2[_i] = value; \ 444 b_noinsert: ; \ 445 bilen[row] = nrow2; \ 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 450 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 451 { 452 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 453 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 454 PetscErrorCode ierr; 455 PetscInt l,*garray = mat->garray,diag; 456 457 PetscFunctionBegin; 458 /* code only works for square matrices A */ 459 460 /* find size of row to the left of the diagonal part */ 461 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 462 row = row - diag; 463 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 464 if (garray[b->j[b->i[row]+l]] > diag) break; 465 } 466 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 467 468 /* diagonal part */ 469 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* right of diagonal part */ 472 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatSetValues_MPIAIJ" 478 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 479 { 480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 481 PetscScalar value; 482 PetscErrorCode ierr; 483 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 484 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 485 PetscBool roworiented = aij->roworiented; 486 487 /* Some Variables required in the macro */ 488 Mat A = aij->A; 489 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 490 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 491 MatScalar *aa = a->a; 492 PetscBool ignorezeroentries = a->ignorezeroentries; 493 Mat B = aij->B; 494 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 495 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 496 MatScalar *ba = b->a; 497 498 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 499 PetscInt nonew; 500 MatScalar *ap1,*ap2; 501 502 PetscFunctionBegin; 503 if (v) PetscValidScalarPointer(v,6); 504 for (i=0; i<m; i++) { 505 if (im[i] < 0) continue; 506 #if defined(PETSC_USE_DEBUG) 507 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 508 #endif 509 if (im[i] >= rstart && im[i] < rend) { 510 row = im[i] - rstart; 511 lastcol1 = -1; 512 rp1 = aj + ai[row]; 513 ap1 = aa + ai[row]; 514 rmax1 = aimax[row]; 515 nrow1 = ailen[row]; 516 low1 = 0; 517 high1 = nrow1; 518 lastcol2 = -1; 519 rp2 = bj + bi[row]; 520 ap2 = ba + bi[row]; 521 rmax2 = bimax[row]; 522 nrow2 = bilen[row]; 523 low2 = 0; 524 high2 = nrow2; 525 526 for (j=0; j<n; j++) { 527 if (v) { 528 if (roworiented) value = v[i*n+j]; 529 else value = v[i+j*m]; 530 } else value = 0.0; 531 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 532 if (in[j] >= cstart && in[j] < cend) { 533 col = in[j] - cstart; 534 nonew = a->nonew; 535 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 536 } else if (in[j] < 0) continue; 537 #if defined(PETSC_USE_DEBUG) 538 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 539 #endif 540 else { 541 if (mat->was_assembled) { 542 if (!aij->colmap) { 543 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 544 } 545 #if defined (PETSC_USE_CTABLE) 546 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 547 col--; 548 #else 549 col = aij->colmap[in[j]] - 1; 550 #endif 551 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 552 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 553 col = in[j]; 554 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 555 B = aij->B; 556 b = (Mat_SeqAIJ*)B->data; 557 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 558 rp2 = bj + bi[row]; 559 ap2 = ba + bi[row]; 560 rmax2 = bimax[row]; 561 nrow2 = bilen[row]; 562 low2 = 0; 563 high2 = nrow2; 564 bm = aij->B->rmap->n; 565 ba = b->a; 566 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 567 } else col = in[j]; 568 nonew = b->nonew; 569 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 570 } 571 } 572 } else { 573 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 574 if (!aij->donotstash) { 575 mat->assembled = PETSC_FALSE; 576 if (roworiented) { 577 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 578 } else { 579 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 580 } 581 } 582 } 583 } 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "MatGetValues_MPIAIJ" 589 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 590 { 591 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 592 PetscErrorCode ierr; 593 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 594 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 595 596 PetscFunctionBegin; 597 for (i=0; i<m; i++) { 598 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 599 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 600 if (idxm[i] >= rstart && idxm[i] < rend) { 601 row = idxm[i] - rstart; 602 for (j=0; j<n; j++) { 603 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 604 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 605 if (idxn[j] >= cstart && idxn[j] < cend) { 606 col = idxn[j] - cstart; 607 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 608 } else { 609 if (!aij->colmap) { 610 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 611 } 612 #if defined (PETSC_USE_CTABLE) 613 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 614 col --; 615 #else 616 col = aij->colmap[idxn[j]] - 1; 617 #endif 618 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 619 else { 620 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 621 } 622 } 623 } 624 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 625 } 626 PetscFunctionReturn(0); 627 } 628 629 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 633 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 PetscErrorCode ierr; 637 PetscInt nstash,reallocs; 638 InsertMode addv; 639 640 PetscFunctionBegin; 641 if (aij->donotstash || mat->nooffprocentries) { 642 PetscFunctionReturn(0); 643 } 644 645 /* make sure all processors are either in INSERTMODE or ADDMODE */ 646 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 647 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 648 mat->insertmode = addv; /* in case this processor had no cache */ 649 650 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 651 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 652 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 658 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 659 { 660 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 661 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 662 PetscErrorCode ierr; 663 PetscMPIInt n; 664 PetscInt i,j,rstart,ncols,flg; 665 PetscInt *row,*col; 666 PetscBool other_disassembled; 667 PetscScalar *val; 668 InsertMode addv = mat->insertmode; 669 670 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 671 672 PetscFunctionBegin; 673 if (!aij->donotstash && !mat->nooffprocentries) { 674 while (1) { 675 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 676 if (!flg) break; 677 678 for (i=0; i<n;) { 679 /* Now identify the consecutive vals belonging to the same row */ 680 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 681 if (j < n) ncols = j-i; 682 else ncols = n-i; 683 /* Now assemble all these values with a single function call */ 684 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 685 i = j; 686 } 687 } 688 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 689 } 690 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 691 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 692 693 /* determine if any processor has disassembled, if so we must 694 also disassemble ourselfs, in order that we may reassemble. */ 695 /* 696 if nonzero structure of submatrix B cannot change then we know that 697 no processor disassembled thus we can skip this stuff 698 */ 699 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 700 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 701 if (mat->was_assembled && !other_disassembled) { 702 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 } 705 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 706 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 709 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 710 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 711 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 712 713 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 714 aij->rowvalues = 0; 715 716 /* used by MatAXPY() */ 717 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 718 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 719 720 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 721 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 727 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 728 { 729 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 730 PetscErrorCode ierr; 731 732 PetscFunctionBegin; 733 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 734 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatZeroRows_MPIAIJ" 740 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 741 { 742 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 743 PetscErrorCode ierr; 744 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 745 PetscInt i,*owners = A->rmap->range; 746 PetscInt *nprocs,j,idx,nsends,row; 747 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 748 PetscInt *rvalues,count,base,slen,*source; 749 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 750 MPI_Comm comm = ((PetscObject)A)->comm; 751 MPI_Request *send_waits,*recv_waits; 752 MPI_Status recv_status,*send_status; 753 const PetscScalar *xx; 754 PetscScalar *bb; 755 #if defined(PETSC_DEBUG) 756 PetscBool found = PETSC_FALSE; 757 #endif 758 759 PetscFunctionBegin; 760 /* first count number of contributors to each processor */ 761 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 762 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 763 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 764 j = 0; 765 for (i=0; i<N; i++) { 766 if (lastidx > (idx = rows[i])) j = 0; 767 lastidx = idx; 768 for (; j<size; j++) { 769 if (idx >= owners[j] && idx < owners[j+1]) { 770 nprocs[2*j]++; 771 nprocs[2*j+1] = 1; 772 owner[i] = j; 773 #if defined(PETSC_DEBUG) 774 found = PETSC_TRUE; 775 #endif 776 break; 777 } 778 } 779 #if defined(PETSC_DEBUG) 780 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 781 found = PETSC_FALSE; 782 #endif 783 } 784 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 785 786 if (A->nooffproczerorows) { 787 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 788 nrecvs = nsends; 789 nmax = N; 790 } else { 791 /* inform other processors of number of messages and max length*/ 792 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 793 } 794 795 /* post receives: */ 796 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 797 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 798 for (i=0; i<nrecvs; i++) { 799 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 800 } 801 802 /* do sends: 803 1) starts[i] gives the starting index in svalues for stuff going to 804 the ith processor 805 */ 806 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 807 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 808 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 809 starts[0] = 0; 810 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 811 for (i=0; i<N; i++) { 812 svalues[starts[owner[i]]++] = rows[i]; 813 } 814 815 starts[0] = 0; 816 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 817 count = 0; 818 for (i=0; i<size; i++) { 819 if (nprocs[2*i+1]) { 820 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 821 } 822 } 823 ierr = PetscFree(starts);CHKERRQ(ierr); 824 825 base = owners[rank]; 826 827 /* wait on receives */ 828 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 829 count = nrecvs; slen = 0; 830 while (count) { 831 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 832 /* unpack receives into our local space */ 833 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 834 source[imdex] = recv_status.MPI_SOURCE; 835 lens[imdex] = n; 836 slen += n; 837 count--; 838 } 839 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 840 841 /* move the data into the send scatter */ 842 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 843 count = 0; 844 for (i=0; i<nrecvs; i++) { 845 values = rvalues + i*nmax; 846 for (j=0; j<lens[i]; j++) { 847 lrows[count++] = values[j] - base; 848 } 849 } 850 ierr = PetscFree(rvalues);CHKERRQ(ierr); 851 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 852 ierr = PetscFree(owner);CHKERRQ(ierr); 853 ierr = PetscFree(nprocs);CHKERRQ(ierr); 854 855 /* fix right hand side if needed */ 856 if (x && b) { 857 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 858 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 859 for (i=0; i<slen; i++) { 860 bb[lrows[i]] = diag*xx[lrows[i]]; 861 } 862 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 864 } 865 /* 866 Zero the required rows. If the "diagonal block" of the matrix 867 is square and the user wishes to set the diagonal we use separate 868 code so that MatSetValues() is not called for each diagonal allocating 869 new memory, thus calling lots of mallocs and slowing things down. 870 871 */ 872 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 873 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 874 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 875 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 876 } else if (diag != 0.0) { 877 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 878 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 879 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 880 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 881 } 882 for (i = 0; i < slen; i++) { 883 row = lrows[i] + rstart; 884 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 885 } 886 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 } else { 889 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 890 } 891 ierr = PetscFree(lrows);CHKERRQ(ierr); 892 893 /* wait on sends */ 894 if (nsends) { 895 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 896 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 897 ierr = PetscFree(send_status);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(send_waits);CHKERRQ(ierr); 900 ierr = PetscFree(svalues);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 911 PetscInt i,*owners = A->rmap->range; 912 PetscInt *nprocs,j,idx,nsends; 913 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 914 PetscInt *rvalues,count,base,slen,*source; 915 PetscInt *lens,*lrows,*values,m; 916 MPI_Comm comm = ((PetscObject)A)->comm; 917 MPI_Request *send_waits,*recv_waits; 918 MPI_Status recv_status,*send_status; 919 const PetscScalar *xx; 920 PetscScalar *bb,*mask; 921 Vec xmask,lmask; 922 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 923 const PetscInt *aj, *ii,*ridx; 924 PetscScalar *aa; 925 #if defined(PETSC_DEBUG) 926 PetscBool found = PETSC_FALSE; 927 #endif 928 929 PetscFunctionBegin; 930 /* first count number of contributors to each processor */ 931 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 932 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 933 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 934 j = 0; 935 for (i=0; i<N; i++) { 936 if (lastidx > (idx = rows[i])) j = 0; 937 lastidx = idx; 938 for (; j<size; j++) { 939 if (idx >= owners[j] && idx < owners[j+1]) { 940 nprocs[2*j]++; 941 nprocs[2*j+1] = 1; 942 owner[i] = j; 943 #if defined(PETSC_DEBUG) 944 found = PETSC_TRUE; 945 #endif 946 break; 947 } 948 } 949 #if defined(PETSC_DEBUG) 950 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 951 found = PETSC_FALSE; 952 #endif 953 } 954 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 955 956 /* inform other processors of number of messages and max length*/ 957 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 958 959 /* post receives: */ 960 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 961 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 962 for (i=0; i<nrecvs; i++) { 963 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 964 } 965 966 /* do sends: 967 1) starts[i] gives the starting index in svalues for stuff going to 968 the ith processor 969 */ 970 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 971 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 972 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 973 starts[0] = 0; 974 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 975 for (i=0; i<N; i++) { 976 svalues[starts[owner[i]]++] = rows[i]; 977 } 978 979 starts[0] = 0; 980 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 981 count = 0; 982 for (i=0; i<size; i++) { 983 if (nprocs[2*i+1]) { 984 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 985 } 986 } 987 ierr = PetscFree(starts);CHKERRQ(ierr); 988 989 base = owners[rank]; 990 991 /* wait on receives */ 992 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 993 count = nrecvs; slen = 0; 994 while (count) { 995 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 996 /* unpack receives into our local space */ 997 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 998 source[imdex] = recv_status.MPI_SOURCE; 999 lens[imdex] = n; 1000 slen += n; 1001 count--; 1002 } 1003 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1004 1005 /* move the data into the send scatter */ 1006 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1007 count = 0; 1008 for (i=0; i<nrecvs; i++) { 1009 values = rvalues + i*nmax; 1010 for (j=0; j<lens[i]; j++) { 1011 lrows[count++] = values[j] - base; 1012 } 1013 } 1014 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1015 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1016 ierr = PetscFree(owner);CHKERRQ(ierr); 1017 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1018 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1019 1020 /* zero diagonal part of matrix */ 1021 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1022 1023 /* handle off diagonal part of matrix */ 1024 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1025 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1026 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1027 for (i=0; i<slen; i++) { 1028 bb[lrows[i]] = 1; 1029 } 1030 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1031 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1034 if (x) { 1035 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1038 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1039 } 1040 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1041 1042 /* remove zeroed rows of off diagonal matrix */ 1043 ii = aij->i; 1044 for (i=0; i<slen; i++) { 1045 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1046 } 1047 1048 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1049 if (aij->compressedrow.use) { 1050 m = aij->compressedrow.nrows; 1051 ii = aij->compressedrow.i; 1052 ridx = aij->compressedrow.rindex; 1053 for (i=0; i<m; i++) { 1054 n = ii[i+1] - ii[i]; 1055 aj = aij->j + ii[i]; 1056 aa = aij->a + ii[i]; 1057 1058 for (j=0; j<n; j++) { 1059 if (PetscAbsScalar(mask[*aj])) { 1060 if (b) bb[*ridx] -= *aa*xx[*aj]; 1061 *aa = 0.0; 1062 } 1063 aa++; 1064 aj++; 1065 } 1066 ridx++; 1067 } 1068 } else { /* do not use compressed row format */ 1069 m = l->B->rmap->n; 1070 for (i=0; i<m; i++) { 1071 n = ii[i+1] - ii[i]; 1072 aj = aij->j + ii[i]; 1073 aa = aij->a + ii[i]; 1074 for (j=0; j<n; j++) { 1075 if (PetscAbsScalar(mask[*aj])) { 1076 if (b) bb[i] -= *aa*xx[*aj]; 1077 *aa = 0.0; 1078 } 1079 aa++; 1080 aj++; 1081 } 1082 } 1083 } 1084 if (x) { 1085 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1086 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1087 } 1088 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1089 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1090 ierr = PetscFree(lrows);CHKERRQ(ierr); 1091 1092 /* wait on sends */ 1093 if (nsends) { 1094 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1095 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1096 ierr = PetscFree(send_status);CHKERRQ(ierr); 1097 } 1098 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1099 ierr = PetscFree(svalues);CHKERRQ(ierr); 1100 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatMult_MPIAIJ" 1106 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1107 { 1108 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1109 PetscErrorCode ierr; 1110 PetscInt nt; 1111 1112 PetscFunctionBegin; 1113 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1114 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1115 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1117 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1118 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1124 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1125 { 1126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1136 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1137 { 1138 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1139 PetscErrorCode ierr; 1140 1141 PetscFunctionBegin; 1142 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1143 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1144 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1145 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1151 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1152 { 1153 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1154 PetscErrorCode ierr; 1155 PetscBool merged; 1156 1157 PetscFunctionBegin; 1158 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1159 /* do nondiagonal part */ 1160 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1161 if (!merged) { 1162 /* send it on its way */ 1163 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1164 /* do local part */ 1165 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1166 /* receive remote parts: note this assumes the values are not actually */ 1167 /* added in yy until the next line, */ 1168 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1169 } else { 1170 /* do local part */ 1171 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1172 /* send it on its way */ 1173 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1174 /* values actually were received in the Begin() but we need to call this nop */ 1175 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1176 } 1177 PetscFunctionReturn(0); 1178 } 1179 1180 EXTERN_C_BEGIN 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1183 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1184 { 1185 MPI_Comm comm; 1186 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1187 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1188 IS Me,Notme; 1189 PetscErrorCode ierr; 1190 PetscInt M,N,first,last,*notme,i; 1191 PetscMPIInt size; 1192 1193 PetscFunctionBegin; 1194 /* Easy test: symmetric diagonal block */ 1195 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1196 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1197 if (!*f) PetscFunctionReturn(0); 1198 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1199 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1200 if (size == 1) PetscFunctionReturn(0); 1201 1202 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1203 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1204 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1205 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1206 for (i=0; i<first; i++) notme[i] = i; 1207 for (i=last; i<M; i++) notme[i-last+first] = i; 1208 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1209 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1210 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1211 Aoff = Aoffs[0]; 1212 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1213 Boff = Boffs[0]; 1214 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1215 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1216 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1217 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1218 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1219 ierr = PetscFree(notme);CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 EXTERN_C_END 1223 1224 #undef __FUNCT__ 1225 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1226 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1227 { 1228 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1229 PetscErrorCode ierr; 1230 1231 PetscFunctionBegin; 1232 /* do nondiagonal part */ 1233 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1234 /* send it on its way */ 1235 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1236 /* do local part */ 1237 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1238 /* receive remote parts */ 1239 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 /* 1244 This only works correctly for square matrices where the subblock A->A is the 1245 diagonal block 1246 */ 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1249 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1250 { 1251 PetscErrorCode ierr; 1252 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1253 1254 PetscFunctionBegin; 1255 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1256 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1257 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatScale_MPIAIJ" 1263 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1264 { 1265 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1266 PetscErrorCode ierr; 1267 1268 PetscFunctionBegin; 1269 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1270 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #undef __FUNCT__ 1275 #define __FUNCT__ "MatDestroy_MPIAIJ" 1276 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1277 { 1278 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1279 PetscErrorCode ierr; 1280 1281 PetscFunctionBegin; 1282 #if defined(PETSC_USE_LOG) 1283 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1284 #endif 1285 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1286 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1287 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1288 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1289 #if defined (PETSC_USE_CTABLE) 1290 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1291 #else 1292 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1293 #endif 1294 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1295 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1296 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1297 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1298 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1299 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1300 1301 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1315 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1316 { 1317 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1318 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1319 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1320 PetscErrorCode ierr; 1321 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1322 int fd; 1323 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1324 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1325 PetscScalar *column_values; 1326 PetscInt message_count,flowcontrolcount; 1327 FILE *file; 1328 1329 PetscFunctionBegin; 1330 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1331 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1332 nz = A->nz + B->nz; 1333 if (!rank) { 1334 header[0] = MAT_FILE_CLASSID; 1335 header[1] = mat->rmap->N; 1336 header[2] = mat->cmap->N; 1337 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1338 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1339 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1340 /* get largest number of rows any processor has */ 1341 rlen = mat->rmap->n; 1342 range = mat->rmap->range; 1343 for (i=1; i<size; i++) { 1344 rlen = PetscMax(rlen,range[i+1] - range[i]); 1345 } 1346 } else { 1347 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1348 rlen = mat->rmap->n; 1349 } 1350 1351 /* load up the local row counts */ 1352 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1353 for (i=0; i<mat->rmap->n; i++) { 1354 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1355 } 1356 1357 /* store the row lengths to the file */ 1358 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1359 if (!rank) { 1360 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1361 for (i=1; i<size; i++) { 1362 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1363 rlen = range[i+1] - range[i]; 1364 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1365 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1366 } 1367 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1368 } else { 1369 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1370 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1371 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1372 } 1373 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1374 1375 /* load up the local column indices */ 1376 nzmax = nz; /* th processor needs space a largest processor needs */ 1377 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1378 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1379 cnt = 0; 1380 for (i=0; i<mat->rmap->n; i++) { 1381 for (j=B->i[i]; j<B->i[i+1]; j++) { 1382 if ((col = garray[B->j[j]]) > cstart) break; 1383 column_indices[cnt++] = col; 1384 } 1385 for (k=A->i[i]; k<A->i[i+1]; k++) { 1386 column_indices[cnt++] = A->j[k] + cstart; 1387 } 1388 for (; j<B->i[i+1]; j++) { 1389 column_indices[cnt++] = garray[B->j[j]]; 1390 } 1391 } 1392 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1393 1394 /* store the column indices to the file */ 1395 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1396 if (!rank) { 1397 MPI_Status status; 1398 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1399 for (i=1; i<size; i++) { 1400 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1401 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1402 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1403 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1404 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1405 } 1406 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1407 } else { 1408 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1409 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1410 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1411 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1412 } 1413 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1414 1415 /* load up the local column values */ 1416 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1417 cnt = 0; 1418 for (i=0; i<mat->rmap->n; i++) { 1419 for (j=B->i[i]; j<B->i[i+1]; j++) { 1420 if (garray[B->j[j]] > cstart) break; 1421 column_values[cnt++] = B->a[j]; 1422 } 1423 for (k=A->i[i]; k<A->i[i+1]; k++) { 1424 column_values[cnt++] = A->a[k]; 1425 } 1426 for (; j<B->i[i+1]; j++) { 1427 column_values[cnt++] = B->a[j]; 1428 } 1429 } 1430 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1431 1432 /* store the column values to the file */ 1433 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1434 if (!rank) { 1435 MPI_Status status; 1436 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1437 for (i=1; i<size; i++) { 1438 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1439 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1440 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1441 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1442 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1443 } 1444 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1445 } else { 1446 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1447 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1448 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1449 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1450 } 1451 ierr = PetscFree(column_values);CHKERRQ(ierr); 1452 1453 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1454 if (file) { 1455 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1456 } 1457 1458 PetscFunctionReturn(0); 1459 } 1460 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1463 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1464 { 1465 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1466 PetscErrorCode ierr; 1467 PetscMPIInt rank = aij->rank,size = aij->size; 1468 PetscBool isdraw,iascii,isbinary; 1469 PetscViewer sviewer; 1470 PetscViewerFormat format; 1471 1472 PetscFunctionBegin; 1473 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1474 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1475 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1476 if (iascii) { 1477 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1478 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1479 MatInfo info; 1480 PetscBool inodes; 1481 1482 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1483 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1484 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1485 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1486 if (!inodes) { 1487 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1488 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1489 } else { 1490 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1491 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1492 } 1493 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1494 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1495 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1496 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1497 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1498 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1499 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1500 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1501 PetscFunctionReturn(0); 1502 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1503 PetscInt inodecount,inodelimit,*inodes; 1504 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1505 if (inodes) { 1506 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1507 } else { 1508 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1509 } 1510 PetscFunctionReturn(0); 1511 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1512 PetscFunctionReturn(0); 1513 } 1514 } else if (isbinary) { 1515 if (size == 1) { 1516 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1517 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1518 } else { 1519 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1520 } 1521 PetscFunctionReturn(0); 1522 } else if (isdraw) { 1523 PetscDraw draw; 1524 PetscBool isnull; 1525 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1526 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1527 } 1528 1529 if (size == 1) { 1530 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1531 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1532 } else { 1533 /* assemble the entire matrix onto first processor. */ 1534 Mat A; 1535 Mat_SeqAIJ *Aloc; 1536 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1537 MatScalar *a; 1538 1539 if (mat->rmap->N > 1024) { 1540 PetscBool flg = PETSC_FALSE; 1541 1542 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1543 if (!flg) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1544 } 1545 1546 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1547 if (!rank) { 1548 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1549 } else { 1550 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1551 } 1552 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1553 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1554 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1555 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1556 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1557 1558 /* copy over the A part */ 1559 Aloc = (Mat_SeqAIJ*)aij->A->data; 1560 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1561 row = mat->rmap->rstart; 1562 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1563 for (i=0; i<m; i++) { 1564 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1565 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1566 } 1567 aj = Aloc->j; 1568 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1569 1570 /* copy over the B part */ 1571 Aloc = (Mat_SeqAIJ*)aij->B->data; 1572 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1573 row = mat->rmap->rstart; 1574 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1575 ct = cols; 1576 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1577 for (i=0; i<m; i++) { 1578 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1579 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1580 } 1581 ierr = PetscFree(ct);CHKERRQ(ierr); 1582 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1584 /* 1585 Everyone has to call to draw the matrix since the graphics waits are 1586 synchronized across all processors that share the PetscDraw object 1587 */ 1588 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1589 if (!rank) { 1590 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1591 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1592 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1593 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1594 } 1595 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1596 ierr = MatDestroy(&A);CHKERRQ(ierr); 1597 } 1598 PetscFunctionReturn(0); 1599 } 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatView_MPIAIJ" 1603 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1604 { 1605 PetscErrorCode ierr; 1606 PetscBool iascii,isdraw,issocket,isbinary; 1607 1608 PetscFunctionBegin; 1609 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1610 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1611 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1612 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1613 if (iascii || isdraw || isbinary || issocket) { 1614 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1615 } 1616 PetscFunctionReturn(0); 1617 } 1618 1619 #undef __FUNCT__ 1620 #define __FUNCT__ "MatSOR_MPIAIJ" 1621 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1622 { 1623 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1624 PetscErrorCode ierr; 1625 Vec bb1 = 0; 1626 PetscBool hasop; 1627 1628 PetscFunctionBegin; 1629 if (flag == SOR_APPLY_UPPER) { 1630 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1631 PetscFunctionReturn(0); 1632 } 1633 1634 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1635 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1636 } 1637 1638 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 1644 while (its--) { 1645 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 1648 /* update rhs: bb1 = bb - B*x */ 1649 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1650 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1651 1652 /* local sweep */ 1653 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1654 } 1655 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1656 if (flag & SOR_ZERO_INITIAL_GUESS) { 1657 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1658 its--; 1659 } 1660 while (its--) { 1661 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 1664 /* update rhs: bb1 = bb - B*x */ 1665 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1666 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1667 1668 /* local sweep */ 1669 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1670 } 1671 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1672 if (flag & SOR_ZERO_INITIAL_GUESS) { 1673 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1674 its--; 1675 } 1676 while (its--) { 1677 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 1680 /* update rhs: bb1 = bb - B*x */ 1681 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1682 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1683 1684 /* local sweep */ 1685 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1686 } 1687 } else if (flag & SOR_EISENSTAT) { 1688 Vec xx1; 1689 1690 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1691 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1692 1693 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1694 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1695 if (!mat->diag) { 1696 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1697 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1698 } 1699 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1700 if (hasop) { 1701 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1702 } else { 1703 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1704 } 1705 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1706 1707 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1708 1709 /* local sweep */ 1710 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1711 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1712 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1713 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1714 1715 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatPermute_MPIAIJ" 1721 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1722 { 1723 Mat aA,aB,Aperm; 1724 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1725 PetscScalar *aa,*ba; 1726 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1727 PetscSF rowsf,sf; 1728 IS parcolp = PETSC_NULL; 1729 PetscBool done; 1730 PetscErrorCode ierr; 1731 1732 PetscFunctionBegin; 1733 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1734 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1735 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1736 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1737 1738 /* Invert row permutation to find out where my rows should go */ 1739 ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr); 1740 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1741 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1742 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1743 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1744 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1745 1746 /* Invert column permutation to find out where my columns should go */ 1747 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1748 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1749 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1750 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1751 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1752 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1753 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1754 1755 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1756 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1757 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1758 1759 /* Find out where my gcols should go */ 1760 ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr); 1761 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1762 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1763 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1764 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1765 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1766 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1767 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1768 1769 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1770 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1771 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1772 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1773 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1774 for (i=0; i<m; i++) { 1775 PetscInt row = rdest[i],rowner; 1776 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1777 for (j=ai[i]; j<ai[i+1]; j++) { 1778 PetscInt cowner,col = cdest[aj[j]]; 1779 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1780 if (rowner == cowner) dnnz[i]++; 1781 else onnz[i]++; 1782 } 1783 for (j=bi[i]; j<bi[i+1]; j++) { 1784 PetscInt cowner,col = gcdest[bj[j]]; 1785 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1786 if (rowner == cowner) dnnz[i]++; 1787 else onnz[i]++; 1788 } 1789 } 1790 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1791 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1792 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1793 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1794 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1795 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1796 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1797 1798 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1799 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1800 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1801 for (i=0; i<m; i++) { 1802 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1803 PetscInt rowlen; 1804 rowlen = ai[i+1] - ai[i]; 1805 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1806 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1807 rowlen = bi[i+1] - bi[i]; 1808 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1809 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1810 } 1811 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1812 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1813 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1814 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1815 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1816 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1817 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1818 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1819 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1820 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1821 *B = Aperm; 1822 PetscFunctionReturn(0); 1823 } 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1827 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1828 { 1829 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1830 Mat A = mat->A,B = mat->B; 1831 PetscErrorCode ierr; 1832 PetscReal isend[5],irecv[5]; 1833 1834 PetscFunctionBegin; 1835 info->block_size = 1.0; 1836 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1837 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1838 isend[3] = info->memory; isend[4] = info->mallocs; 1839 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1840 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1841 isend[3] += info->memory; isend[4] += info->mallocs; 1842 if (flag == MAT_LOCAL) { 1843 info->nz_used = isend[0]; 1844 info->nz_allocated = isend[1]; 1845 info->nz_unneeded = isend[2]; 1846 info->memory = isend[3]; 1847 info->mallocs = isend[4]; 1848 } else if (flag == MAT_GLOBAL_MAX) { 1849 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1850 info->nz_used = irecv[0]; 1851 info->nz_allocated = irecv[1]; 1852 info->nz_unneeded = irecv[2]; 1853 info->memory = irecv[3]; 1854 info->mallocs = irecv[4]; 1855 } else if (flag == MAT_GLOBAL_SUM) { 1856 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1857 info->nz_used = irecv[0]; 1858 info->nz_allocated = irecv[1]; 1859 info->nz_unneeded = irecv[2]; 1860 info->memory = irecv[3]; 1861 info->mallocs = irecv[4]; 1862 } 1863 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1864 info->fill_ratio_needed = 0; 1865 info->factor_mallocs = 0; 1866 PetscFunctionReturn(0); 1867 } 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatSetOption_MPIAIJ" 1871 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1872 { 1873 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1874 PetscErrorCode ierr; 1875 1876 PetscFunctionBegin; 1877 switch (op) { 1878 case MAT_NEW_NONZERO_LOCATIONS: 1879 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1880 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1881 case MAT_KEEP_NONZERO_PATTERN: 1882 case MAT_NEW_NONZERO_LOCATION_ERR: 1883 case MAT_USE_INODES: 1884 case MAT_IGNORE_ZERO_ENTRIES: 1885 MatCheckPreallocated(A,1); 1886 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1887 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1888 break; 1889 case MAT_ROW_ORIENTED: 1890 a->roworiented = flg; 1891 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1892 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1893 break; 1894 case MAT_NEW_DIAGONALS: 1895 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1896 break; 1897 case MAT_IGNORE_OFF_PROC_ENTRIES: 1898 a->donotstash = flg; 1899 break; 1900 case MAT_SPD: 1901 A->spd_set = PETSC_TRUE; 1902 A->spd = flg; 1903 if (flg) { 1904 A->symmetric = PETSC_TRUE; 1905 A->structurally_symmetric = PETSC_TRUE; 1906 A->symmetric_set = PETSC_TRUE; 1907 A->structurally_symmetric_set = PETSC_TRUE; 1908 } 1909 break; 1910 case MAT_SYMMETRIC: 1911 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1912 break; 1913 case MAT_STRUCTURALLY_SYMMETRIC: 1914 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1915 break; 1916 case MAT_HERMITIAN: 1917 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1918 break; 1919 case MAT_SYMMETRY_ETERNAL: 1920 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1921 break; 1922 default: 1923 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1924 } 1925 PetscFunctionReturn(0); 1926 } 1927 1928 #undef __FUNCT__ 1929 #define __FUNCT__ "MatGetRow_MPIAIJ" 1930 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1931 { 1932 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1933 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1934 PetscErrorCode ierr; 1935 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1936 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1937 PetscInt *cmap,*idx_p; 1938 1939 PetscFunctionBegin; 1940 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1941 mat->getrowactive = PETSC_TRUE; 1942 1943 if (!mat->rowvalues && (idx || v)) { 1944 /* 1945 allocate enough space to hold information from the longest row. 1946 */ 1947 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1948 PetscInt max = 1,tmp; 1949 for (i=0; i<matin->rmap->n; i++) { 1950 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1951 if (max < tmp) { max = tmp; } 1952 } 1953 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1954 } 1955 1956 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1957 lrow = row - rstart; 1958 1959 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1960 if (!v) {pvA = 0; pvB = 0;} 1961 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1962 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1963 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1964 nztot = nzA + nzB; 1965 1966 cmap = mat->garray; 1967 if (v || idx) { 1968 if (nztot) { 1969 /* Sort by increasing column numbers, assuming A and B already sorted */ 1970 PetscInt imark = -1; 1971 if (v) { 1972 *v = v_p = mat->rowvalues; 1973 for (i=0; i<nzB; i++) { 1974 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1975 else break; 1976 } 1977 imark = i; 1978 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1979 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1980 } 1981 if (idx) { 1982 *idx = idx_p = mat->rowindices; 1983 if (imark > -1) { 1984 for (i=0; i<imark; i++) { 1985 idx_p[i] = cmap[cworkB[i]]; 1986 } 1987 } else { 1988 for (i=0; i<nzB; i++) { 1989 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1990 else break; 1991 } 1992 imark = i; 1993 } 1994 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1995 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1996 } 1997 } else { 1998 if (idx) *idx = 0; 1999 if (v) *v = 0; 2000 } 2001 } 2002 *nz = nztot; 2003 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 2004 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 2005 PetscFunctionReturn(0); 2006 } 2007 2008 #undef __FUNCT__ 2009 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 2010 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2011 { 2012 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2013 2014 PetscFunctionBegin; 2015 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2016 aij->getrowactive = PETSC_FALSE; 2017 PetscFunctionReturn(0); 2018 } 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "MatNorm_MPIAIJ" 2022 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2023 { 2024 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2025 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2026 PetscErrorCode ierr; 2027 PetscInt i,j,cstart = mat->cmap->rstart; 2028 PetscReal sum = 0.0; 2029 MatScalar *v; 2030 2031 PetscFunctionBegin; 2032 if (aij->size == 1) { 2033 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2034 } else { 2035 if (type == NORM_FROBENIUS) { 2036 v = amat->a; 2037 for (i=0; i<amat->nz; i++) { 2038 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2039 } 2040 v = bmat->a; 2041 for (i=0; i<bmat->nz; i++) { 2042 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2043 } 2044 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2045 *norm = PetscSqrtReal(*norm); 2046 } else if (type == NORM_1) { /* max column norm */ 2047 PetscReal *tmp,*tmp2; 2048 PetscInt *jj,*garray = aij->garray; 2049 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2050 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2051 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2052 *norm = 0.0; 2053 v = amat->a; jj = amat->j; 2054 for (j=0; j<amat->nz; j++) { 2055 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 2056 } 2057 v = bmat->a; jj = bmat->j; 2058 for (j=0; j<bmat->nz; j++) { 2059 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2060 } 2061 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2062 for (j=0; j<mat->cmap->N; j++) { 2063 if (tmp2[j] > *norm) *norm = tmp2[j]; 2064 } 2065 ierr = PetscFree(tmp);CHKERRQ(ierr); 2066 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2067 } else if (type == NORM_INFINITY) { /* max row norm */ 2068 PetscReal ntemp = 0.0; 2069 for (j=0; j<aij->A->rmap->n; j++) { 2070 v = amat->a + amat->i[j]; 2071 sum = 0.0; 2072 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2073 sum += PetscAbsScalar(*v); v++; 2074 } 2075 v = bmat->a + bmat->i[j]; 2076 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2077 sum += PetscAbsScalar(*v); v++; 2078 } 2079 if (sum > ntemp) ntemp = sum; 2080 } 2081 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2082 } else { 2083 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2084 } 2085 } 2086 PetscFunctionReturn(0); 2087 } 2088 2089 #undef __FUNCT__ 2090 #define __FUNCT__ "MatTranspose_MPIAIJ" 2091 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2092 { 2093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2094 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2095 PetscErrorCode ierr; 2096 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2097 PetscInt cstart=A->cmap->rstart,ncol; 2098 Mat B; 2099 MatScalar *array; 2100 2101 PetscFunctionBegin; 2102 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2103 2104 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2105 ai = Aloc->i; aj = Aloc->j; 2106 bi = Bloc->i; bj = Bloc->j; 2107 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2108 PetscInt *d_nnz,*g_nnz,*o_nnz; 2109 PetscSFNode *oloc; 2110 PETSC_UNUSED PetscSF sf; 2111 2112 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2113 /* compute d_nnz for preallocation */ 2114 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2115 for (i=0; i<ai[ma]; i++) { 2116 d_nnz[aj[i]] ++; 2117 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2118 } 2119 /* compute local off-diagonal contributions */ 2120 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2121 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2122 /* map those to global */ 2123 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 2124 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2125 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2126 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2127 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2128 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2129 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2130 2131 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2132 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2133 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2134 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2135 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2136 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2137 } else { 2138 B = *matout; 2139 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2140 for (i=0; i<ai[ma]; i++) { 2141 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2142 } 2143 } 2144 2145 /* copy over the A part */ 2146 array = Aloc->a; 2147 row = A->rmap->rstart; 2148 for (i=0; i<ma; i++) { 2149 ncol = ai[i+1]-ai[i]; 2150 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2151 row++; array += ncol; aj += ncol; 2152 } 2153 aj = Aloc->j; 2154 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2155 2156 /* copy over the B part */ 2157 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2158 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2159 array = Bloc->a; 2160 row = A->rmap->rstart; 2161 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2162 cols_tmp = cols; 2163 for (i=0; i<mb; i++) { 2164 ncol = bi[i+1]-bi[i]; 2165 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2166 row++; array += ncol; cols_tmp += ncol; 2167 } 2168 ierr = PetscFree(cols);CHKERRQ(ierr); 2169 2170 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2171 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2172 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2173 *matout = B; 2174 } else { 2175 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2176 } 2177 PetscFunctionReturn(0); 2178 } 2179 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2182 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2183 { 2184 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2185 Mat a = aij->A,b = aij->B; 2186 PetscErrorCode ierr; 2187 PetscInt s1,s2,s3; 2188 2189 PetscFunctionBegin; 2190 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2191 if (rr) { 2192 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2193 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2194 /* Overlap communication with computation. */ 2195 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2196 } 2197 if (ll) { 2198 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2199 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2200 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2201 } 2202 /* scale the diagonal block */ 2203 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2204 2205 if (rr) { 2206 /* Do a scatter end and then right scale the off-diagonal block */ 2207 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2208 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2209 } 2210 2211 PetscFunctionReturn(0); 2212 } 2213 2214 #undef __FUNCT__ 2215 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2216 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2217 { 2218 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2223 PetscFunctionReturn(0); 2224 } 2225 2226 #undef __FUNCT__ 2227 #define __FUNCT__ "MatEqual_MPIAIJ" 2228 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2229 { 2230 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2231 Mat a,b,c,d; 2232 PetscBool flg; 2233 PetscErrorCode ierr; 2234 2235 PetscFunctionBegin; 2236 a = matA->A; b = matA->B; 2237 c = matB->A; d = matB->B; 2238 2239 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2240 if (flg) { 2241 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2242 } 2243 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2244 PetscFunctionReturn(0); 2245 } 2246 2247 #undef __FUNCT__ 2248 #define __FUNCT__ "MatCopy_MPIAIJ" 2249 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2250 { 2251 PetscErrorCode ierr; 2252 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2253 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2254 2255 PetscFunctionBegin; 2256 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2257 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2258 /* because of the column compression in the off-processor part of the matrix a->B, 2259 the number of columns in a->B and b->B may be different, hence we cannot call 2260 the MatCopy() directly on the two parts. If need be, we can provide a more 2261 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2262 then copying the submatrices */ 2263 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2264 } else { 2265 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2266 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2267 } 2268 PetscFunctionReturn(0); 2269 } 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatSetUp_MPIAIJ" 2273 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2274 { 2275 PetscErrorCode ierr; 2276 2277 PetscFunctionBegin; 2278 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2279 PetscFunctionReturn(0); 2280 } 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2284 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2285 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2286 { 2287 PetscInt i,m=Y->rmap->N; 2288 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2289 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2290 const PetscInt *xi = x->i,*yi = y->i; 2291 2292 PetscFunctionBegin; 2293 /* Set the number of nonzeros in the new matrix */ 2294 for (i=0; i<m; i++) { 2295 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2296 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2297 nnz[i] = 0; 2298 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2299 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2300 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2301 nnz[i]++; 2302 } 2303 for (; k<nzy; k++) nnz[i]++; 2304 } 2305 PetscFunctionReturn(0); 2306 } 2307 2308 #undef __FUNCT__ 2309 #define __FUNCT__ "MatAXPY_MPIAIJ" 2310 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2311 { 2312 PetscErrorCode ierr; 2313 PetscInt i; 2314 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2315 PetscBLASInt bnz,one=1; 2316 Mat_SeqAIJ *x,*y; 2317 2318 PetscFunctionBegin; 2319 if (str == SAME_NONZERO_PATTERN) { 2320 PetscScalar alpha = a; 2321 x = (Mat_SeqAIJ *)xx->A->data; 2322 y = (Mat_SeqAIJ *)yy->A->data; 2323 bnz = PetscBLASIntCast(x->nz); 2324 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2325 x = (Mat_SeqAIJ *)xx->B->data; 2326 y = (Mat_SeqAIJ *)yy->B->data; 2327 bnz = PetscBLASIntCast(x->nz); 2328 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2329 } else if (str == SUBSET_NONZERO_PATTERN) { 2330 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2331 2332 x = (Mat_SeqAIJ *)xx->B->data; 2333 y = (Mat_SeqAIJ *)yy->B->data; 2334 if (y->xtoy && y->XtoY != xx->B) { 2335 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2336 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2337 } 2338 if (!y->xtoy) { /* get xtoy */ 2339 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2340 y->XtoY = xx->B; 2341 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2342 } 2343 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2344 } else { 2345 Mat B; 2346 PetscInt *nnz_d,*nnz_o; 2347 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2348 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2349 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2350 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2351 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2352 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2353 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2354 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2355 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2356 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2357 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2358 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2359 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2360 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2361 } 2362 PetscFunctionReturn(0); 2363 } 2364 2365 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2366 2367 #undef __FUNCT__ 2368 #define __FUNCT__ "MatConjugate_MPIAIJ" 2369 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2370 { 2371 #if defined(PETSC_USE_COMPLEX) 2372 PetscErrorCode ierr; 2373 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2374 2375 PetscFunctionBegin; 2376 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2377 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2378 #else 2379 PetscFunctionBegin; 2380 #endif 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatRealPart_MPIAIJ" 2386 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2387 { 2388 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2393 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #undef __FUNCT__ 2398 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2399 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2400 { 2401 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2402 PetscErrorCode ierr; 2403 2404 PetscFunctionBegin; 2405 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2406 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2407 PetscFunctionReturn(0); 2408 } 2409 2410 #if defined(PETSC_HAVE_PBGL) 2411 2412 #include <boost/parallel/mpi/bsp_process_group.hpp> 2413 #include <boost/graph/distributed/ilu_default_graph.hpp> 2414 #include <boost/graph/distributed/ilu_0_block.hpp> 2415 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2416 #include <boost/graph/distributed/petsc/interface.hpp> 2417 #include <boost/multi_array.hpp> 2418 #include <boost/parallel/distributed_property_map->hpp> 2419 2420 #undef __FUNCT__ 2421 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2422 /* 2423 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2424 */ 2425 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2426 { 2427 namespace petsc = boost::distributed::petsc; 2428 2429 namespace graph_dist = boost::graph::distributed; 2430 using boost::graph::distributed::ilu_default::process_group_type; 2431 using boost::graph::ilu_permuted; 2432 2433 PetscBool row_identity, col_identity; 2434 PetscContainer c; 2435 PetscInt m, n, M, N; 2436 PetscErrorCode ierr; 2437 2438 PetscFunctionBegin; 2439 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2440 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2441 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2442 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2443 2444 process_group_type pg; 2445 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2446 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2447 lgraph_type& level_graph = *lgraph_p; 2448 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2449 2450 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2451 ilu_permuted(level_graph); 2452 2453 /* put together the new matrix */ 2454 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2455 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2456 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2457 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2458 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2459 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2460 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2461 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2462 2463 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2464 ierr = PetscContainerSetPointer(c, lgraph_p); 2465 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2466 ierr = PetscContainerDestroy(&c); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2472 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2473 { 2474 PetscFunctionBegin; 2475 PetscFunctionReturn(0); 2476 } 2477 2478 #undef __FUNCT__ 2479 #define __FUNCT__ "MatSolve_MPIAIJ" 2480 /* 2481 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2482 */ 2483 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2484 { 2485 namespace graph_dist = boost::graph::distributed; 2486 2487 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2488 lgraph_type* lgraph_p; 2489 PetscContainer c; 2490 PetscErrorCode ierr; 2491 2492 PetscFunctionBegin; 2493 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2494 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2495 ierr = VecCopy(b, x);CHKERRQ(ierr); 2496 2497 PetscScalar* array_x; 2498 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2499 PetscInt sx; 2500 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2501 2502 PetscScalar* array_b; 2503 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2504 PetscInt sb; 2505 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2506 2507 lgraph_type& level_graph = *lgraph_p; 2508 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2509 2510 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2511 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2512 ref_x(array_x, boost::extents[num_vertices(graph)]); 2513 2514 typedef boost::iterator_property_map<array_ref_type::iterator, 2515 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2516 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2517 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2518 2519 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2520 2521 PetscFunctionReturn(0); 2522 } 2523 #endif 2524 2525 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2526 PetscInt nzlocal,nsends,nrecvs; 2527 PetscMPIInt *send_rank,*recv_rank; 2528 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2529 PetscScalar *sbuf_a,**rbuf_a; 2530 PetscErrorCode (*Destroy)(Mat); 2531 } Mat_Redundant; 2532 2533 #undef __FUNCT__ 2534 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2535 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2536 { 2537 PetscErrorCode ierr; 2538 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2539 PetscInt i; 2540 2541 PetscFunctionBegin; 2542 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2543 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2544 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2545 for (i=0; i<redund->nrecvs; i++) { 2546 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2547 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2548 } 2549 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2550 ierr = PetscFree(redund);CHKERRQ(ierr); 2551 PetscFunctionReturn(0); 2552 } 2553 2554 #undef __FUNCT__ 2555 #define __FUNCT__ "MatDestroy_MatRedundant" 2556 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2557 { 2558 PetscErrorCode ierr; 2559 PetscContainer container; 2560 Mat_Redundant *redund=PETSC_NULL; 2561 2562 PetscFunctionBegin; 2563 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2564 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2565 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2566 A->ops->destroy = redund->Destroy; 2567 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2568 if (A->ops->destroy) { 2569 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2570 } 2571 PetscFunctionReturn(0); 2572 } 2573 2574 #undef __FUNCT__ 2575 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2576 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2577 { 2578 PetscMPIInt rank,size; 2579 MPI_Comm comm=((PetscObject)mat)->comm; 2580 PetscErrorCode ierr; 2581 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2582 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2583 PetscInt *rowrange=mat->rmap->range; 2584 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2585 Mat A=aij->A,B=aij->B,C=*matredundant; 2586 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2587 PetscScalar *sbuf_a; 2588 PetscInt nzlocal=a->nz+b->nz; 2589 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2590 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2591 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2592 MatScalar *aworkA,*aworkB; 2593 PetscScalar *vals; 2594 PetscMPIInt tag1,tag2,tag3,imdex; 2595 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2596 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2597 MPI_Status recv_status,*send_status; 2598 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2599 PetscInt **rbuf_j=PETSC_NULL; 2600 PetscScalar **rbuf_a=PETSC_NULL; 2601 Mat_Redundant *redund=PETSC_NULL; 2602 PetscContainer container; 2603 2604 PetscFunctionBegin; 2605 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2606 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2607 2608 if (reuse == MAT_REUSE_MATRIX) { 2609 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2610 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2611 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2612 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2613 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2614 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2615 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2616 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2617 2618 nsends = redund->nsends; 2619 nrecvs = redund->nrecvs; 2620 send_rank = redund->send_rank; 2621 recv_rank = redund->recv_rank; 2622 sbuf_nz = redund->sbuf_nz; 2623 rbuf_nz = redund->rbuf_nz; 2624 sbuf_j = redund->sbuf_j; 2625 sbuf_a = redund->sbuf_a; 2626 rbuf_j = redund->rbuf_j; 2627 rbuf_a = redund->rbuf_a; 2628 } 2629 2630 if (reuse == MAT_INITIAL_MATRIX) { 2631 PetscMPIInt subrank,subsize; 2632 PetscInt nleftover,np_subcomm; 2633 /* get the destination processors' id send_rank, nsends and nrecvs */ 2634 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2635 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2636 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2637 np_subcomm = size/nsubcomm; 2638 nleftover = size - nsubcomm*np_subcomm; 2639 nsends = 0; nrecvs = 0; 2640 for (i=0; i<size; i++) { /* i=rank*/ 2641 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2642 send_rank[nsends] = i; nsends++; 2643 recv_rank[nrecvs++] = i; 2644 } 2645 } 2646 if (rank >= size - nleftover) {/* this proc is a leftover processor */ 2647 i = size-nleftover-1; 2648 j = 0; 2649 while (j < nsubcomm - nleftover) { 2650 send_rank[nsends++] = i; 2651 i--; j++; 2652 } 2653 } 2654 2655 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2656 for (i=0; i<nleftover; i++) { 2657 recv_rank[nrecvs++] = size-nleftover+i; 2658 } 2659 } 2660 2661 /* allocate sbuf_j, sbuf_a */ 2662 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2663 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2664 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2665 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2666 2667 /* copy mat's local entries into the buffers */ 2668 if (reuse == MAT_INITIAL_MATRIX) { 2669 rownz_max = 0; 2670 rptr = sbuf_j; 2671 cols = sbuf_j + rend-rstart + 1; 2672 vals = sbuf_a; 2673 rptr[0] = 0; 2674 for (i=0; i<rend-rstart; i++) { 2675 row = i + rstart; 2676 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2677 ncols = nzA + nzB; 2678 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2679 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2680 /* load the column indices for this row into cols */ 2681 lwrite = 0; 2682 for (l=0; l<nzB; l++) { 2683 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2684 vals[lwrite] = aworkB[l]; 2685 cols[lwrite++] = ctmp; 2686 } 2687 } 2688 for (l=0; l<nzA; l++) { 2689 vals[lwrite] = aworkA[l]; 2690 cols[lwrite++] = cstart + cworkA[l]; 2691 } 2692 for (l=0; l<nzB; l++) { 2693 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2694 vals[lwrite] = aworkB[l]; 2695 cols[lwrite++] = ctmp; 2696 } 2697 } 2698 vals += ncols; 2699 cols += ncols; 2700 rptr[i+1] = rptr[i] + ncols; 2701 if (rownz_max < ncols) rownz_max = ncols; 2702 } 2703 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2704 } else { /* only copy matrix values into sbuf_a */ 2705 rptr = sbuf_j; 2706 vals = sbuf_a; 2707 rptr[0] = 0; 2708 for (i=0; i<rend-rstart; i++) { 2709 row = i + rstart; 2710 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2711 ncols = nzA + nzB; 2712 cworkB = b->j + b->i[i]; 2713 aworkA = a->a + a->i[i]; 2714 aworkB = b->a + b->i[i]; 2715 lwrite = 0; 2716 for (l=0; l<nzB; l++) { 2717 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2718 } 2719 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2720 for (l=0; l<nzB; l++) { 2721 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2722 } 2723 vals += ncols; 2724 rptr[i+1] = rptr[i] + ncols; 2725 } 2726 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2727 2728 /* send nzlocal to others, and recv other's nzlocal */ 2729 /*--------------------------------------------------*/ 2730 if (reuse == MAT_INITIAL_MATRIX) { 2731 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2732 s_waits2 = s_waits3 + nsends; 2733 s_waits1 = s_waits2 + nsends; 2734 r_waits1 = s_waits1 + nsends; 2735 r_waits2 = r_waits1 + nrecvs; 2736 r_waits3 = r_waits2 + nrecvs; 2737 } else { 2738 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2739 r_waits3 = s_waits3 + nsends; 2740 } 2741 2742 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2743 if (reuse == MAT_INITIAL_MATRIX) { 2744 /* get new tags to keep the communication clean */ 2745 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2746 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2747 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2748 2749 /* post receives of other's nzlocal */ 2750 for (i=0; i<nrecvs; i++) { 2751 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2752 } 2753 /* send nzlocal to others */ 2754 for (i=0; i<nsends; i++) { 2755 sbuf_nz[i] = nzlocal; 2756 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2757 } 2758 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2759 count = nrecvs; 2760 while (count) { 2761 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2762 recv_rank[imdex] = recv_status.MPI_SOURCE; 2763 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2764 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2765 2766 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2767 rbuf_nz[imdex] += i + 2; 2768 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2769 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2770 count--; 2771 } 2772 /* wait on sends of nzlocal */ 2773 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2774 /* send mat->i,j to others, and recv from other's */ 2775 /*------------------------------------------------*/ 2776 for (i=0; i<nsends; i++) { 2777 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2778 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2779 } 2780 /* wait on receives of mat->i,j */ 2781 /*------------------------------*/ 2782 count = nrecvs; 2783 while (count) { 2784 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2785 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2786 count--; 2787 } 2788 /* wait on sends of mat->i,j */ 2789 /*---------------------------*/ 2790 if (nsends) { 2791 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2792 } 2793 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2794 2795 /* post receives, send and receive mat->a */ 2796 /*----------------------------------------*/ 2797 for (imdex=0; imdex<nrecvs; imdex++) { 2798 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2799 } 2800 for (i=0; i<nsends; i++) { 2801 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2802 } 2803 count = nrecvs; 2804 while (count) { 2805 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2806 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2807 count--; 2808 } 2809 if (nsends) { 2810 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2811 } 2812 2813 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2814 2815 /* create redundant matrix */ 2816 /*-------------------------*/ 2817 if (reuse == MAT_INITIAL_MATRIX) { 2818 /* compute rownz_max for preallocation */ 2819 for (imdex=0; imdex<nrecvs; imdex++) { 2820 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2821 rptr = rbuf_j[imdex]; 2822 for (i=0; i<j; i++) { 2823 ncols = rptr[i+1] - rptr[i]; 2824 if (rownz_max < ncols) rownz_max = ncols; 2825 } 2826 } 2827 2828 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2829 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2830 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2831 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2832 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2833 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2834 } else { 2835 C = *matredundant; 2836 } 2837 2838 /* insert local matrix entries */ 2839 rptr = sbuf_j; 2840 cols = sbuf_j + rend-rstart + 1; 2841 vals = sbuf_a; 2842 for (i=0; i<rend-rstart; i++) { 2843 row = i + rstart; 2844 ncols = rptr[i+1] - rptr[i]; 2845 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2846 vals += ncols; 2847 cols += ncols; 2848 } 2849 /* insert received matrix entries */ 2850 for (imdex=0; imdex<nrecvs; imdex++) { 2851 rstart = rowrange[recv_rank[imdex]]; 2852 rend = rowrange[recv_rank[imdex]+1]; 2853 rptr = rbuf_j[imdex]; 2854 cols = rbuf_j[imdex] + rend-rstart + 1; 2855 vals = rbuf_a[imdex]; 2856 for (i=0; i<rend-rstart; i++) { 2857 row = i + rstart; 2858 ncols = rptr[i+1] - rptr[i]; 2859 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2860 vals += ncols; 2861 cols += ncols; 2862 } 2863 } 2864 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2865 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2866 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2867 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2868 if (reuse == MAT_INITIAL_MATRIX) { 2869 PetscContainer container; 2870 *matredundant = C; 2871 /* create a supporting struct and attach it to C for reuse */ 2872 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2873 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2874 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2875 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2876 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2877 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2878 2879 redund->nzlocal = nzlocal; 2880 redund->nsends = nsends; 2881 redund->nrecvs = nrecvs; 2882 redund->send_rank = send_rank; 2883 redund->recv_rank = recv_rank; 2884 redund->sbuf_nz = sbuf_nz; 2885 redund->rbuf_nz = rbuf_nz; 2886 redund->sbuf_j = sbuf_j; 2887 redund->sbuf_a = sbuf_a; 2888 redund->rbuf_j = rbuf_j; 2889 redund->rbuf_a = rbuf_a; 2890 2891 redund->Destroy = C->ops->destroy; 2892 C->ops->destroy = MatDestroy_MatRedundant; 2893 } 2894 PetscFunctionReturn(0); 2895 } 2896 2897 #undef __FUNCT__ 2898 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2899 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2900 { 2901 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2902 PetscErrorCode ierr; 2903 PetscInt i,*idxb = 0; 2904 PetscScalar *va,*vb; 2905 Vec vtmp; 2906 2907 PetscFunctionBegin; 2908 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2909 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2910 if (idx) { 2911 for (i=0; i<A->rmap->n; i++) { 2912 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2913 } 2914 } 2915 2916 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2917 if (idx) { 2918 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2919 } 2920 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2921 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2922 2923 for (i=0; i<A->rmap->n; i++) { 2924 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2925 va[i] = vb[i]; 2926 if (idx) idx[i] = a->garray[idxb[i]]; 2927 } 2928 } 2929 2930 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2931 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2932 ierr = PetscFree(idxb);CHKERRQ(ierr); 2933 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2934 PetscFunctionReturn(0); 2935 } 2936 2937 #undef __FUNCT__ 2938 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2939 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2940 { 2941 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2942 PetscErrorCode ierr; 2943 PetscInt i,*idxb = 0; 2944 PetscScalar *va,*vb; 2945 Vec vtmp; 2946 2947 PetscFunctionBegin; 2948 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2949 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2950 if (idx) { 2951 for (i=0; i<A->cmap->n; i++) { 2952 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2953 } 2954 } 2955 2956 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2957 if (idx) { 2958 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2959 } 2960 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2961 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2962 2963 for (i=0; i<A->rmap->n; i++) { 2964 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2965 va[i] = vb[i]; 2966 if (idx) idx[i] = a->garray[idxb[i]]; 2967 } 2968 } 2969 2970 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2971 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2972 ierr = PetscFree(idxb);CHKERRQ(ierr); 2973 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2974 PetscFunctionReturn(0); 2975 } 2976 2977 #undef __FUNCT__ 2978 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2979 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2980 { 2981 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2982 PetscInt n = A->rmap->n; 2983 PetscInt cstart = A->cmap->rstart; 2984 PetscInt *cmap = mat->garray; 2985 PetscInt *diagIdx, *offdiagIdx; 2986 Vec diagV, offdiagV; 2987 PetscScalar *a, *diagA, *offdiagA; 2988 PetscInt r; 2989 PetscErrorCode ierr; 2990 2991 PetscFunctionBegin; 2992 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2993 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2994 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2995 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2996 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2997 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2998 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2999 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3000 for (r = 0; r < n; ++r) { 3001 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3002 a[r] = diagA[r]; 3003 idx[r] = cstart + diagIdx[r]; 3004 } else { 3005 a[r] = offdiagA[r]; 3006 idx[r] = cmap[offdiagIdx[r]]; 3007 } 3008 } 3009 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3010 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3011 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3012 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3013 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3014 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3015 PetscFunctionReturn(0); 3016 } 3017 3018 #undef __FUNCT__ 3019 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3020 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3021 { 3022 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 3023 PetscInt n = A->rmap->n; 3024 PetscInt cstart = A->cmap->rstart; 3025 PetscInt *cmap = mat->garray; 3026 PetscInt *diagIdx, *offdiagIdx; 3027 Vec diagV, offdiagV; 3028 PetscScalar *a, *diagA, *offdiagA; 3029 PetscInt r; 3030 PetscErrorCode ierr; 3031 3032 PetscFunctionBegin; 3033 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3034 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3035 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3036 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3037 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3038 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3039 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3040 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3041 for (r = 0; r < n; ++r) { 3042 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3043 a[r] = diagA[r]; 3044 idx[r] = cstart + diagIdx[r]; 3045 } else { 3046 a[r] = offdiagA[r]; 3047 idx[r] = cmap[offdiagIdx[r]]; 3048 } 3049 } 3050 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3051 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3052 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3053 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3054 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3055 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3056 PetscFunctionReturn(0); 3057 } 3058 3059 #undef __FUNCT__ 3060 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3061 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3062 { 3063 PetscErrorCode ierr; 3064 Mat *dummy; 3065 3066 PetscFunctionBegin; 3067 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3068 *newmat = *dummy; 3069 ierr = PetscFree(dummy);CHKERRQ(ierr); 3070 PetscFunctionReturn(0); 3071 } 3072 3073 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3074 3075 #undef __FUNCT__ 3076 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3077 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3078 { 3079 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3080 PetscErrorCode ierr; 3081 3082 PetscFunctionBegin; 3083 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3084 PetscFunctionReturn(0); 3085 } 3086 3087 #undef __FUNCT__ 3088 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3089 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3090 { 3091 PetscErrorCode ierr; 3092 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3093 3094 PetscFunctionBegin; 3095 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3096 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3097 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3098 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3099 PetscFunctionReturn(0); 3100 } 3101 3102 /* -------------------------------------------------------------------*/ 3103 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3104 MatGetRow_MPIAIJ, 3105 MatRestoreRow_MPIAIJ, 3106 MatMult_MPIAIJ, 3107 /* 4*/ MatMultAdd_MPIAIJ, 3108 MatMultTranspose_MPIAIJ, 3109 MatMultTransposeAdd_MPIAIJ, 3110 #if defined(PETSC_HAVE_PBGL) 3111 MatSolve_MPIAIJ, 3112 #else 3113 0, 3114 #endif 3115 0, 3116 0, 3117 /*10*/ 0, 3118 0, 3119 0, 3120 MatSOR_MPIAIJ, 3121 MatTranspose_MPIAIJ, 3122 /*15*/ MatGetInfo_MPIAIJ, 3123 MatEqual_MPIAIJ, 3124 MatGetDiagonal_MPIAIJ, 3125 MatDiagonalScale_MPIAIJ, 3126 MatNorm_MPIAIJ, 3127 /*20*/ MatAssemblyBegin_MPIAIJ, 3128 MatAssemblyEnd_MPIAIJ, 3129 MatSetOption_MPIAIJ, 3130 MatZeroEntries_MPIAIJ, 3131 /*24*/ MatZeroRows_MPIAIJ, 3132 0, 3133 #if defined(PETSC_HAVE_PBGL) 3134 0, 3135 #else 3136 0, 3137 #endif 3138 0, 3139 0, 3140 /*29*/ MatSetUp_MPIAIJ, 3141 #if defined(PETSC_HAVE_PBGL) 3142 0, 3143 #else 3144 0, 3145 #endif 3146 0, 3147 0, 3148 0, 3149 /*34*/ MatDuplicate_MPIAIJ, 3150 0, 3151 0, 3152 0, 3153 0, 3154 /*39*/ MatAXPY_MPIAIJ, 3155 MatGetSubMatrices_MPIAIJ, 3156 MatIncreaseOverlap_MPIAIJ, 3157 MatGetValues_MPIAIJ, 3158 MatCopy_MPIAIJ, 3159 /*44*/ MatGetRowMax_MPIAIJ, 3160 MatScale_MPIAIJ, 3161 0, 3162 0, 3163 MatZeroRowsColumns_MPIAIJ, 3164 /*49*/ MatSetRandom_MPIAIJ, 3165 0, 3166 0, 3167 0, 3168 0, 3169 /*54*/ MatFDColoringCreate_MPIAIJ, 3170 0, 3171 MatSetUnfactored_MPIAIJ, 3172 MatPermute_MPIAIJ, 3173 0, 3174 /*59*/ MatGetSubMatrix_MPIAIJ, 3175 MatDestroy_MPIAIJ, 3176 MatView_MPIAIJ, 3177 0, 3178 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3179 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3180 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3181 0, 3182 0, 3183 0, 3184 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3185 MatGetRowMinAbs_MPIAIJ, 3186 0, 3187 MatSetColoring_MPIAIJ, 3188 0, 3189 MatSetValuesAdifor_MPIAIJ, 3190 /*75*/ MatFDColoringApply_AIJ, 3191 0, 3192 0, 3193 0, 3194 MatFindZeroDiagonals_MPIAIJ, 3195 /*80*/ 0, 3196 0, 3197 0, 3198 /*83*/ MatLoad_MPIAIJ, 3199 0, 3200 0, 3201 0, 3202 0, 3203 0, 3204 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3205 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3206 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3207 MatPtAP_MPIAIJ_MPIAIJ, 3208 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3209 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3210 0, 3211 0, 3212 0, 3213 0, 3214 /*99*/ 0, 3215 0, 3216 0, 3217 MatConjugate_MPIAIJ, 3218 0, 3219 /*104*/MatSetValuesRow_MPIAIJ, 3220 MatRealPart_MPIAIJ, 3221 MatImaginaryPart_MPIAIJ, 3222 0, 3223 0, 3224 /*109*/0, 3225 MatGetRedundantMatrix_MPIAIJ, 3226 MatGetRowMin_MPIAIJ, 3227 0, 3228 0, 3229 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3230 0, 3231 0, 3232 0, 3233 0, 3234 /*119*/0, 3235 0, 3236 0, 3237 0, 3238 MatGetMultiProcBlock_MPIAIJ, 3239 /*124*/MatFindNonzeroRows_MPIAIJ, 3240 MatGetColumnNorms_MPIAIJ, 3241 MatInvertBlockDiagonal_MPIAIJ, 3242 0, 3243 MatGetSubMatricesParallel_MPIAIJ, 3244 /*129*/0, 3245 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3246 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3247 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3248 0, 3249 /*134*/0, 3250 0, 3251 0, 3252 0, 3253 0 3254 }; 3255 3256 /* ----------------------------------------------------------------------------------------*/ 3257 3258 EXTERN_C_BEGIN 3259 #undef __FUNCT__ 3260 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3261 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3262 { 3263 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3264 PetscErrorCode ierr; 3265 3266 PetscFunctionBegin; 3267 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3268 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3269 PetscFunctionReturn(0); 3270 } 3271 EXTERN_C_END 3272 3273 EXTERN_C_BEGIN 3274 #undef __FUNCT__ 3275 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3276 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3277 { 3278 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3279 PetscErrorCode ierr; 3280 3281 PetscFunctionBegin; 3282 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3283 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3284 PetscFunctionReturn(0); 3285 } 3286 EXTERN_C_END 3287 3288 EXTERN_C_BEGIN 3289 #undef __FUNCT__ 3290 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3291 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3292 { 3293 Mat_MPIAIJ *b; 3294 PetscErrorCode ierr; 3295 PetscInt i; 3296 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3297 3298 PetscFunctionBegin; 3299 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3300 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3301 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3302 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3303 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3304 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3305 3306 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3307 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3308 if (d_nnz) { 3309 for (i=0; i<B->rmap->n; i++) { 3310 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3311 } 3312 } 3313 if (o_nnz) { 3314 for (i=0; i<B->rmap->n; i++) { 3315 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3316 } 3317 } 3318 b = (Mat_MPIAIJ*)B->data; 3319 3320 if (!B->preallocated) { 3321 /* Explicitly create 2 MATSEQAIJ matrices. */ 3322 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3323 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3324 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3325 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3326 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3327 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3328 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3329 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3330 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3331 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3332 } 3333 3334 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3335 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3336 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3337 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3338 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3339 B->preallocated = PETSC_TRUE; 3340 PetscFunctionReturn(0); 3341 } 3342 EXTERN_C_END 3343 3344 #undef __FUNCT__ 3345 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3346 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3347 { 3348 Mat mat; 3349 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3350 PetscErrorCode ierr; 3351 3352 PetscFunctionBegin; 3353 *newmat = 0; 3354 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3355 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3356 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3357 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3358 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3359 a = (Mat_MPIAIJ*)mat->data; 3360 3361 mat->factortype = matin->factortype; 3362 mat->rmap->bs = matin->rmap->bs; 3363 mat->cmap->bs = matin->cmap->bs; 3364 mat->assembled = PETSC_TRUE; 3365 mat->insertmode = NOT_SET_VALUES; 3366 mat->preallocated = PETSC_TRUE; 3367 3368 a->size = oldmat->size; 3369 a->rank = oldmat->rank; 3370 a->donotstash = oldmat->donotstash; 3371 a->roworiented = oldmat->roworiented; 3372 a->rowindices = 0; 3373 a->rowvalues = 0; 3374 a->getrowactive = PETSC_FALSE; 3375 3376 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3377 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3378 3379 if (oldmat->colmap) { 3380 #if defined (PETSC_USE_CTABLE) 3381 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3382 #else 3383 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3384 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3385 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3386 #endif 3387 } else a->colmap = 0; 3388 if (oldmat->garray) { 3389 PetscInt len; 3390 len = oldmat->B->cmap->n; 3391 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3392 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3393 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3394 } else a->garray = 0; 3395 3396 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3397 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3398 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3399 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3400 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3401 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3402 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3403 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3404 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3405 *newmat = mat; 3406 PetscFunctionReturn(0); 3407 } 3408 3409 3410 3411 #undef __FUNCT__ 3412 #define __FUNCT__ "MatLoad_MPIAIJ" 3413 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3414 { 3415 PetscScalar *vals,*svals; 3416 MPI_Comm comm = ((PetscObject)viewer)->comm; 3417 PetscErrorCode ierr; 3418 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3419 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3420 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3421 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3422 PetscInt cend,cstart,n,*rowners,sizesset=1; 3423 int fd; 3424 PetscInt bs = 1; 3425 3426 PetscFunctionBegin; 3427 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3428 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3429 if (!rank) { 3430 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3431 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3432 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3433 } 3434 3435 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3436 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3437 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3438 3439 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3440 3441 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3442 M = header[1]; N = header[2]; 3443 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3444 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3445 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3446 3447 /* If global sizes are set, check if they are consistent with that given in the file */ 3448 if (sizesset) { 3449 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3450 } 3451 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3452 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3453 3454 /* determine ownership of all (block) rows */ 3455 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3456 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3457 else m = newMat->rmap->n; /* Set by user */ 3458 3459 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3460 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3461 3462 /* First process needs enough room for process with most rows */ 3463 if (!rank) { 3464 mmax = rowners[1]; 3465 for (i=2; i<=size; i++) { 3466 mmax = PetscMax(mmax, rowners[i]); 3467 } 3468 } else mmax = m; 3469 3470 rowners[0] = 0; 3471 for (i=2; i<=size; i++) { 3472 rowners[i] += rowners[i-1]; 3473 } 3474 rstart = rowners[rank]; 3475 rend = rowners[rank+1]; 3476 3477 /* distribute row lengths to all processors */ 3478 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3479 if (!rank) { 3480 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3481 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3482 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3483 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3484 for (j=0; j<m; j++) { 3485 procsnz[0] += ourlens[j]; 3486 } 3487 for (i=1; i<size; i++) { 3488 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3489 /* calculate the number of nonzeros on each processor */ 3490 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3491 procsnz[i] += rowlengths[j]; 3492 } 3493 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3494 } 3495 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3496 } else { 3497 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3498 } 3499 3500 if (!rank) { 3501 /* determine max buffer needed and allocate it */ 3502 maxnz = 0; 3503 for (i=0; i<size; i++) { 3504 maxnz = PetscMax(maxnz,procsnz[i]); 3505 } 3506 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3507 3508 /* read in my part of the matrix column indices */ 3509 nz = procsnz[0]; 3510 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3511 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3512 3513 /* read in every one elses and ship off */ 3514 for (i=1; i<size; i++) { 3515 nz = procsnz[i]; 3516 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3517 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3518 } 3519 ierr = PetscFree(cols);CHKERRQ(ierr); 3520 } else { 3521 /* determine buffer space needed for message */ 3522 nz = 0; 3523 for (i=0; i<m; i++) { 3524 nz += ourlens[i]; 3525 } 3526 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3527 3528 /* receive message of column indices*/ 3529 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3530 } 3531 3532 /* determine column ownership if matrix is not square */ 3533 if (N != M) { 3534 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3535 else n = newMat->cmap->n; 3536 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3537 cstart = cend - n; 3538 } else { 3539 cstart = rstart; 3540 cend = rend; 3541 n = cend - cstart; 3542 } 3543 3544 /* loop over local rows, determining number of off diagonal entries */ 3545 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3546 jj = 0; 3547 for (i=0; i<m; i++) { 3548 for (j=0; j<ourlens[i]; j++) { 3549 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3550 jj++; 3551 } 3552 } 3553 3554 for (i=0; i<m; i++) { 3555 ourlens[i] -= offlens[i]; 3556 } 3557 if (!sizesset) { 3558 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3559 } 3560 3561 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3562 3563 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3564 3565 for (i=0; i<m; i++) { 3566 ourlens[i] += offlens[i]; 3567 } 3568 3569 if (!rank) { 3570 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3571 3572 /* read in my part of the matrix numerical values */ 3573 nz = procsnz[0]; 3574 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3575 3576 /* insert into matrix */ 3577 jj = rstart; 3578 smycols = mycols; 3579 svals = vals; 3580 for (i=0; i<m; i++) { 3581 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3582 smycols += ourlens[i]; 3583 svals += ourlens[i]; 3584 jj++; 3585 } 3586 3587 /* read in other processors and ship out */ 3588 for (i=1; i<size; i++) { 3589 nz = procsnz[i]; 3590 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3591 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3592 } 3593 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3594 } else { 3595 /* receive numeric values */ 3596 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3597 3598 /* receive message of values*/ 3599 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3600 3601 /* insert into matrix */ 3602 jj = rstart; 3603 smycols = mycols; 3604 svals = vals; 3605 for (i=0; i<m; i++) { 3606 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3607 smycols += ourlens[i]; 3608 svals += ourlens[i]; 3609 jj++; 3610 } 3611 } 3612 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3613 ierr = PetscFree(vals);CHKERRQ(ierr); 3614 ierr = PetscFree(mycols);CHKERRQ(ierr); 3615 ierr = PetscFree(rowners);CHKERRQ(ierr); 3616 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3617 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3618 3619 PetscFunctionReturn(0); 3620 } 3621 3622 #undef __FUNCT__ 3623 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3624 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3625 { 3626 PetscErrorCode ierr; 3627 IS iscol_local; 3628 PetscInt csize; 3629 3630 PetscFunctionBegin; 3631 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3632 if (call == MAT_REUSE_MATRIX) { 3633 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3634 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3635 } else { 3636 PetscInt cbs; 3637 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3638 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3639 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3640 } 3641 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3642 if (call == MAT_INITIAL_MATRIX) { 3643 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3644 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3645 } 3646 PetscFunctionReturn(0); 3647 } 3648 3649 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3650 #undef __FUNCT__ 3651 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3652 /* 3653 Not great since it makes two copies of the submatrix, first an SeqAIJ 3654 in local and then by concatenating the local matrices the end result. 3655 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3656 3657 Note: This requires a sequential iscol with all indices. 3658 */ 3659 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3660 { 3661 PetscErrorCode ierr; 3662 PetscMPIInt rank,size; 3663 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3664 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3665 PetscBool allcolumns, colflag; 3666 Mat M,Mreuse; 3667 MatScalar *vwork,*aa; 3668 MPI_Comm comm = ((PetscObject)mat)->comm; 3669 Mat_SeqAIJ *aij; 3670 3671 3672 PetscFunctionBegin; 3673 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3674 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3675 3676 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3677 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3678 if (colflag && ncol == mat->cmap->N) { 3679 allcolumns = PETSC_TRUE; 3680 } else { 3681 allcolumns = PETSC_FALSE; 3682 } 3683 if (call == MAT_REUSE_MATRIX) { 3684 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3685 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3686 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3687 } else { 3688 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3689 } 3690 3691 /* 3692 m - number of local rows 3693 n - number of columns (same on all processors) 3694 rstart - first row in new global matrix generated 3695 */ 3696 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3697 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3698 if (call == MAT_INITIAL_MATRIX) { 3699 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3700 ii = aij->i; 3701 jj = aij->j; 3702 3703 /* 3704 Determine the number of non-zeros in the diagonal and off-diagonal 3705 portions of the matrix in order to do correct preallocation 3706 */ 3707 3708 /* first get start and end of "diagonal" columns */ 3709 if (csize == PETSC_DECIDE) { 3710 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3711 if (mglobal == n) { /* square matrix */ 3712 nlocal = m; 3713 } else { 3714 nlocal = n/size + ((n % size) > rank); 3715 } 3716 } else { 3717 nlocal = csize; 3718 } 3719 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3720 rstart = rend - nlocal; 3721 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3722 3723 /* next, compute all the lengths */ 3724 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3725 olens = dlens + m; 3726 for (i=0; i<m; i++) { 3727 jend = ii[i+1] - ii[i]; 3728 olen = 0; 3729 dlen = 0; 3730 for (j=0; j<jend; j++) { 3731 if (*jj < rstart || *jj >= rend) olen++; 3732 else dlen++; 3733 jj++; 3734 } 3735 olens[i] = olen; 3736 dlens[i] = dlen; 3737 } 3738 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3739 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3740 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3741 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3742 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3743 ierr = PetscFree(dlens);CHKERRQ(ierr); 3744 } else { 3745 PetscInt ml,nl; 3746 3747 M = *newmat; 3748 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3749 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3750 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3751 /* 3752 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3753 rather than the slower MatSetValues(). 3754 */ 3755 M->was_assembled = PETSC_TRUE; 3756 M->assembled = PETSC_FALSE; 3757 } 3758 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3759 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3760 ii = aij->i; 3761 jj = aij->j; 3762 aa = aij->a; 3763 for (i=0; i<m; i++) { 3764 row = rstart + i; 3765 nz = ii[i+1] - ii[i]; 3766 cwork = jj; jj += nz; 3767 vwork = aa; aa += nz; 3768 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3769 } 3770 3771 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3772 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3773 *newmat = M; 3774 3775 /* save submatrix used in processor for next request */ 3776 if (call == MAT_INITIAL_MATRIX) { 3777 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3778 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3779 } 3780 3781 PetscFunctionReturn(0); 3782 } 3783 3784 EXTERN_C_BEGIN 3785 #undef __FUNCT__ 3786 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3787 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3788 { 3789 PetscInt m,cstart, cend,j,nnz,i,d; 3790 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3791 const PetscInt *JJ; 3792 PetscScalar *values; 3793 PetscErrorCode ierr; 3794 3795 PetscFunctionBegin; 3796 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3797 3798 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3799 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3800 m = B->rmap->n; 3801 cstart = B->cmap->rstart; 3802 cend = B->cmap->rend; 3803 rstart = B->rmap->rstart; 3804 3805 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3806 3807 #if defined(PETSC_USE_DEBUGGING) 3808 for (i=0; i<m; i++) { 3809 nnz = Ii[i+1]- Ii[i]; 3810 JJ = J + Ii[i]; 3811 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3812 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3813 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3814 } 3815 #endif 3816 3817 for (i=0; i<m; i++) { 3818 nnz = Ii[i+1]- Ii[i]; 3819 JJ = J + Ii[i]; 3820 nnz_max = PetscMax(nnz_max,nnz); 3821 d = 0; 3822 for (j=0; j<nnz; j++) { 3823 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3824 } 3825 d_nnz[i] = d; 3826 o_nnz[i] = nnz - d; 3827 } 3828 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3829 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3830 3831 if (v) values = (PetscScalar*)v; 3832 else { 3833 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3834 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3835 } 3836 3837 for (i=0; i<m; i++) { 3838 ii = i + rstart; 3839 nnz = Ii[i+1]- Ii[i]; 3840 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3841 } 3842 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3843 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3844 3845 if (!v) { 3846 ierr = PetscFree(values);CHKERRQ(ierr); 3847 } 3848 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3849 PetscFunctionReturn(0); 3850 } 3851 EXTERN_C_END 3852 3853 #undef __FUNCT__ 3854 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3855 /*@ 3856 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3857 (the default parallel PETSc format). 3858 3859 Collective on MPI_Comm 3860 3861 Input Parameters: 3862 + B - the matrix 3863 . i - the indices into j for the start of each local row (starts with zero) 3864 . j - the column indices for each local row (starts with zero) 3865 - v - optional values in the matrix 3866 3867 Level: developer 3868 3869 Notes: 3870 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3871 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3872 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3873 3874 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3875 3876 The format which is used for the sparse matrix input, is equivalent to a 3877 row-major ordering.. i.e for the following matrix, the input data expected is 3878 as shown: 3879 3880 1 0 0 3881 2 0 3 P0 3882 ------- 3883 4 5 6 P1 3884 3885 Process0 [P0]: rows_owned=[0,1] 3886 i = {0,1,3} [size = nrow+1 = 2+1] 3887 j = {0,0,2} [size = nz = 6] 3888 v = {1,2,3} [size = nz = 6] 3889 3890 Process1 [P1]: rows_owned=[2] 3891 i = {0,3} [size = nrow+1 = 1+1] 3892 j = {0,1,2} [size = nz = 6] 3893 v = {4,5,6} [size = nz = 6] 3894 3895 .keywords: matrix, aij, compressed row, sparse, parallel 3896 3897 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3898 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3899 @*/ 3900 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3901 { 3902 PetscErrorCode ierr; 3903 3904 PetscFunctionBegin; 3905 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3906 PetscFunctionReturn(0); 3907 } 3908 3909 #undef __FUNCT__ 3910 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3911 /*@C 3912 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3913 (the default parallel PETSc format). For good matrix assembly performance 3914 the user should preallocate the matrix storage by setting the parameters 3915 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3916 performance can be increased by more than a factor of 50. 3917 3918 Collective on MPI_Comm 3919 3920 Input Parameters: 3921 + A - the matrix 3922 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3923 (same value is used for all local rows) 3924 . d_nnz - array containing the number of nonzeros in the various rows of the 3925 DIAGONAL portion of the local submatrix (possibly different for each row) 3926 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3927 The size of this array is equal to the number of local rows, i.e 'm'. 3928 For matrices that will be factored, you must leave room for (and set) 3929 the diagonal entry even if it is zero. 3930 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3931 submatrix (same value is used for all local rows). 3932 - o_nnz - array containing the number of nonzeros in the various rows of the 3933 OFF-DIAGONAL portion of the local submatrix (possibly different for 3934 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3935 structure. The size of this array is equal to the number 3936 of local rows, i.e 'm'. 3937 3938 If the *_nnz parameter is given then the *_nz parameter is ignored 3939 3940 The AIJ format (also called the Yale sparse matrix format or 3941 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3942 storage. The stored row and column indices begin with zero. 3943 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3944 3945 The parallel matrix is partitioned such that the first m0 rows belong to 3946 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3947 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3948 3949 The DIAGONAL portion of the local submatrix of a processor can be defined 3950 as the submatrix which is obtained by extraction the part corresponding to 3951 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3952 first row that belongs to the processor, r2 is the last row belonging to 3953 the this processor, and c1-c2 is range of indices of the local part of a 3954 vector suitable for applying the matrix to. This is an mxn matrix. In the 3955 common case of a square matrix, the row and column ranges are the same and 3956 the DIAGONAL part is also square. The remaining portion of the local 3957 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3958 3959 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3960 3961 You can call MatGetInfo() to get information on how effective the preallocation was; 3962 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3963 You can also run with the option -info and look for messages with the string 3964 malloc in them to see if additional memory allocation was needed. 3965 3966 Example usage: 3967 3968 Consider the following 8x8 matrix with 34 non-zero values, that is 3969 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3970 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3971 as follows: 3972 3973 .vb 3974 1 2 0 | 0 3 0 | 0 4 3975 Proc0 0 5 6 | 7 0 0 | 8 0 3976 9 0 10 | 11 0 0 | 12 0 3977 ------------------------------------- 3978 13 0 14 | 15 16 17 | 0 0 3979 Proc1 0 18 0 | 19 20 21 | 0 0 3980 0 0 0 | 22 23 0 | 24 0 3981 ------------------------------------- 3982 Proc2 25 26 27 | 0 0 28 | 29 0 3983 30 0 0 | 31 32 33 | 0 34 3984 .ve 3985 3986 This can be represented as a collection of submatrices as: 3987 3988 .vb 3989 A B C 3990 D E F 3991 G H I 3992 .ve 3993 3994 Where the submatrices A,B,C are owned by proc0, D,E,F are 3995 owned by proc1, G,H,I are owned by proc2. 3996 3997 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3998 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3999 The 'M','N' parameters are 8,8, and have the same values on all procs. 4000 4001 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4002 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4003 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4004 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4005 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4006 matrix, ans [DF] as another SeqAIJ matrix. 4007 4008 When d_nz, o_nz parameters are specified, d_nz storage elements are 4009 allocated for every row of the local diagonal submatrix, and o_nz 4010 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4011 One way to choose d_nz and o_nz is to use the max nonzerors per local 4012 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4013 In this case, the values of d_nz,o_nz are: 4014 .vb 4015 proc0 : dnz = 2, o_nz = 2 4016 proc1 : dnz = 3, o_nz = 2 4017 proc2 : dnz = 1, o_nz = 4 4018 .ve 4019 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4020 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4021 for proc3. i.e we are using 12+15+10=37 storage locations to store 4022 34 values. 4023 4024 When d_nnz, o_nnz parameters are specified, the storage is specified 4025 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4026 In the above case the values for d_nnz,o_nnz are: 4027 .vb 4028 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4029 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4030 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4031 .ve 4032 Here the space allocated is sum of all the above values i.e 34, and 4033 hence pre-allocation is perfect. 4034 4035 Level: intermediate 4036 4037 .keywords: matrix, aij, compressed row, sparse, parallel 4038 4039 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4040 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4041 @*/ 4042 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4043 { 4044 PetscErrorCode ierr; 4045 4046 PetscFunctionBegin; 4047 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4048 PetscValidType(B,1); 4049 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4050 PetscFunctionReturn(0); 4051 } 4052 4053 #undef __FUNCT__ 4054 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4055 /*@ 4056 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4057 CSR format the local rows. 4058 4059 Collective on MPI_Comm 4060 4061 Input Parameters: 4062 + comm - MPI communicator 4063 . m - number of local rows (Cannot be PETSC_DECIDE) 4064 . n - This value should be the same as the local size used in creating the 4065 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4066 calculated if N is given) For square matrices n is almost always m. 4067 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4068 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4069 . i - row indices 4070 . j - column indices 4071 - a - matrix values 4072 4073 Output Parameter: 4074 . mat - the matrix 4075 4076 Level: intermediate 4077 4078 Notes: 4079 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4080 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4081 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4082 4083 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4084 4085 The format which is used for the sparse matrix input, is equivalent to a 4086 row-major ordering.. i.e for the following matrix, the input data expected is 4087 as shown: 4088 4089 1 0 0 4090 2 0 3 P0 4091 ------- 4092 4 5 6 P1 4093 4094 Process0 [P0]: rows_owned=[0,1] 4095 i = {0,1,3} [size = nrow+1 = 2+1] 4096 j = {0,0,2} [size = nz = 6] 4097 v = {1,2,3} [size = nz = 6] 4098 4099 Process1 [P1]: rows_owned=[2] 4100 i = {0,3} [size = nrow+1 = 1+1] 4101 j = {0,1,2} [size = nz = 6] 4102 v = {4,5,6} [size = nz = 6] 4103 4104 .keywords: matrix, aij, compressed row, sparse, parallel 4105 4106 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4107 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4108 @*/ 4109 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4110 { 4111 PetscErrorCode ierr; 4112 4113 PetscFunctionBegin; 4114 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4115 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4116 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4117 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4118 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4119 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4120 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4121 PetscFunctionReturn(0); 4122 } 4123 4124 #undef __FUNCT__ 4125 #define __FUNCT__ "MatCreateAIJ" 4126 /*@C 4127 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4128 (the default parallel PETSc format). For good matrix assembly performance 4129 the user should preallocate the matrix storage by setting the parameters 4130 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4131 performance can be increased by more than a factor of 50. 4132 4133 Collective on MPI_Comm 4134 4135 Input Parameters: 4136 + comm - MPI communicator 4137 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4138 This value should be the same as the local size used in creating the 4139 y vector for the matrix-vector product y = Ax. 4140 . n - This value should be the same as the local size used in creating the 4141 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4142 calculated if N is given) For square matrices n is almost always m. 4143 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4144 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4145 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4146 (same value is used for all local rows) 4147 . d_nnz - array containing the number of nonzeros in the various rows of the 4148 DIAGONAL portion of the local submatrix (possibly different for each row) 4149 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4150 The size of this array is equal to the number of local rows, i.e 'm'. 4151 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4152 submatrix (same value is used for all local rows). 4153 - o_nnz - array containing the number of nonzeros in the various rows of the 4154 OFF-DIAGONAL portion of the local submatrix (possibly different for 4155 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4156 structure. The size of this array is equal to the number 4157 of local rows, i.e 'm'. 4158 4159 Output Parameter: 4160 . A - the matrix 4161 4162 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4163 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4164 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4165 4166 Notes: 4167 If the *_nnz parameter is given then the *_nz parameter is ignored 4168 4169 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4170 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4171 storage requirements for this matrix. 4172 4173 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4174 processor than it must be used on all processors that share the object for 4175 that argument. 4176 4177 The user MUST specify either the local or global matrix dimensions 4178 (possibly both). 4179 4180 The parallel matrix is partitioned across processors such that the 4181 first m0 rows belong to process 0, the next m1 rows belong to 4182 process 1, the next m2 rows belong to process 2 etc.. where 4183 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4184 values corresponding to [m x N] submatrix. 4185 4186 The columns are logically partitioned with the n0 columns belonging 4187 to 0th partition, the next n1 columns belonging to the next 4188 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4189 4190 The DIAGONAL portion of the local submatrix on any given processor 4191 is the submatrix corresponding to the rows and columns m,n 4192 corresponding to the given processor. i.e diagonal matrix on 4193 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4194 etc. The remaining portion of the local submatrix [m x (N-n)] 4195 constitute the OFF-DIAGONAL portion. The example below better 4196 illustrates this concept. 4197 4198 For a square global matrix we define each processor's diagonal portion 4199 to be its local rows and the corresponding columns (a square submatrix); 4200 each processor's off-diagonal portion encompasses the remainder of the 4201 local matrix (a rectangular submatrix). 4202 4203 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4204 4205 When calling this routine with a single process communicator, a matrix of 4206 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4207 type of communicator, use the construction mechanism: 4208 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4209 4210 By default, this format uses inodes (identical nodes) when possible. 4211 We search for consecutive rows with the same nonzero structure, thereby 4212 reusing matrix information to achieve increased efficiency. 4213 4214 Options Database Keys: 4215 + -mat_no_inode - Do not use inodes 4216 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4217 - -mat_aij_oneindex - Internally use indexing starting at 1 4218 rather than 0. Note that when calling MatSetValues(), 4219 the user still MUST index entries starting at 0! 4220 4221 4222 Example usage: 4223 4224 Consider the following 8x8 matrix with 34 non-zero values, that is 4225 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4226 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4227 as follows: 4228 4229 .vb 4230 1 2 0 | 0 3 0 | 0 4 4231 Proc0 0 5 6 | 7 0 0 | 8 0 4232 9 0 10 | 11 0 0 | 12 0 4233 ------------------------------------- 4234 13 0 14 | 15 16 17 | 0 0 4235 Proc1 0 18 0 | 19 20 21 | 0 0 4236 0 0 0 | 22 23 0 | 24 0 4237 ------------------------------------- 4238 Proc2 25 26 27 | 0 0 28 | 29 0 4239 30 0 0 | 31 32 33 | 0 34 4240 .ve 4241 4242 This can be represented as a collection of submatrices as: 4243 4244 .vb 4245 A B C 4246 D E F 4247 G H I 4248 .ve 4249 4250 Where the submatrices A,B,C are owned by proc0, D,E,F are 4251 owned by proc1, G,H,I are owned by proc2. 4252 4253 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4254 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4255 The 'M','N' parameters are 8,8, and have the same values on all procs. 4256 4257 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4258 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4259 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4260 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4261 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4262 matrix, ans [DF] as another SeqAIJ matrix. 4263 4264 When d_nz, o_nz parameters are specified, d_nz storage elements are 4265 allocated for every row of the local diagonal submatrix, and o_nz 4266 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4267 One way to choose d_nz and o_nz is to use the max nonzerors per local 4268 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4269 In this case, the values of d_nz,o_nz are: 4270 .vb 4271 proc0 : dnz = 2, o_nz = 2 4272 proc1 : dnz = 3, o_nz = 2 4273 proc2 : dnz = 1, o_nz = 4 4274 .ve 4275 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4276 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4277 for proc3. i.e we are using 12+15+10=37 storage locations to store 4278 34 values. 4279 4280 When d_nnz, o_nnz parameters are specified, the storage is specified 4281 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4282 In the above case the values for d_nnz,o_nnz are: 4283 .vb 4284 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4285 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4286 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4287 .ve 4288 Here the space allocated is sum of all the above values i.e 34, and 4289 hence pre-allocation is perfect. 4290 4291 Level: intermediate 4292 4293 .keywords: matrix, aij, compressed row, sparse, parallel 4294 4295 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4296 MPIAIJ, MatCreateMPIAIJWithArrays() 4297 @*/ 4298 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4299 { 4300 PetscErrorCode ierr; 4301 PetscMPIInt size; 4302 4303 PetscFunctionBegin; 4304 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4305 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4306 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4307 if (size > 1) { 4308 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4309 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4310 } else { 4311 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4312 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4313 } 4314 PetscFunctionReturn(0); 4315 } 4316 4317 #undef __FUNCT__ 4318 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4319 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4320 { 4321 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4322 4323 PetscFunctionBegin; 4324 *Ad = a->A; 4325 *Ao = a->B; 4326 *colmap = a->garray; 4327 PetscFunctionReturn(0); 4328 } 4329 4330 #undef __FUNCT__ 4331 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4332 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4333 { 4334 PetscErrorCode ierr; 4335 PetscInt i; 4336 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4337 4338 PetscFunctionBegin; 4339 if (coloring->ctype == IS_COLORING_GLOBAL) { 4340 ISColoringValue *allcolors,*colors; 4341 ISColoring ocoloring; 4342 4343 /* set coloring for diagonal portion */ 4344 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4345 4346 /* set coloring for off-diagonal portion */ 4347 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4348 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4349 for (i=0; i<a->B->cmap->n; i++) { 4350 colors[i] = allcolors[a->garray[i]]; 4351 } 4352 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4353 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4354 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4355 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4356 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4357 ISColoringValue *colors; 4358 PetscInt *larray; 4359 ISColoring ocoloring; 4360 4361 /* set coloring for diagonal portion */ 4362 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4363 for (i=0; i<a->A->cmap->n; i++) { 4364 larray[i] = i + A->cmap->rstart; 4365 } 4366 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4367 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4368 for (i=0; i<a->A->cmap->n; i++) { 4369 colors[i] = coloring->colors[larray[i]]; 4370 } 4371 ierr = PetscFree(larray);CHKERRQ(ierr); 4372 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4373 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4374 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4375 4376 /* set coloring for off-diagonal portion */ 4377 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4378 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4379 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4380 for (i=0; i<a->B->cmap->n; i++) { 4381 colors[i] = coloring->colors[larray[i]]; 4382 } 4383 ierr = PetscFree(larray);CHKERRQ(ierr); 4384 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4385 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4386 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4387 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4388 4389 PetscFunctionReturn(0); 4390 } 4391 4392 #undef __FUNCT__ 4393 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4394 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4395 { 4396 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4397 PetscErrorCode ierr; 4398 4399 PetscFunctionBegin; 4400 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4401 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4402 PetscFunctionReturn(0); 4403 } 4404 4405 #undef __FUNCT__ 4406 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4407 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4408 { 4409 PetscErrorCode ierr; 4410 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4411 PetscInt *indx; 4412 4413 PetscFunctionBegin; 4414 /* This routine will ONLY return MPIAIJ type matrix */ 4415 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4416 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4417 if (n == PETSC_DECIDE) { 4418 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4419 } 4420 /* Check sum(n) = N */ 4421 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4422 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4423 4424 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4425 rstart -= m; 4426 4427 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4428 for (i=0;i<m;i++) { 4429 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4430 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4431 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4432 } 4433 4434 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4435 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4436 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4437 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4438 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4439 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4440 PetscFunctionReturn(0); 4441 } 4442 4443 #undef __FUNCT__ 4444 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4445 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4446 { 4447 PetscErrorCode ierr; 4448 PetscInt m,N,i,rstart,nnz,Ii; 4449 PetscInt *indx; 4450 PetscScalar *values; 4451 4452 PetscFunctionBegin; 4453 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4454 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4455 for (i=0;i<m;i++) { 4456 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4457 Ii = i + rstart; 4458 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4459 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4460 } 4461 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4462 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4463 PetscFunctionReturn(0); 4464 } 4465 4466 #undef __FUNCT__ 4467 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4468 /*@ 4469 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4470 matrices from each processor 4471 4472 Collective on MPI_Comm 4473 4474 Input Parameters: 4475 + comm - the communicators the parallel matrix will live on 4476 . inmat - the input sequential matrices 4477 . n - number of local columns (or PETSC_DECIDE) 4478 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4479 4480 Output Parameter: 4481 . outmat - the parallel matrix generated 4482 4483 Level: advanced 4484 4485 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4486 4487 @*/ 4488 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4489 { 4490 PetscErrorCode ierr; 4491 4492 PetscFunctionBegin; 4493 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4494 if (scall == MAT_INITIAL_MATRIX) { 4495 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4496 } 4497 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4498 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4499 PetscFunctionReturn(0); 4500 } 4501 4502 #undef __FUNCT__ 4503 #define __FUNCT__ "MatFileSplit" 4504 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4505 { 4506 PetscErrorCode ierr; 4507 PetscMPIInt rank; 4508 PetscInt m,N,i,rstart,nnz; 4509 size_t len; 4510 const PetscInt *indx; 4511 PetscViewer out; 4512 char *name; 4513 Mat B; 4514 const PetscScalar *values; 4515 4516 PetscFunctionBegin; 4517 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4518 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4519 /* Should this be the type of the diagonal block of A? */ 4520 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4521 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4522 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4523 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4524 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4525 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4526 for (i=0;i<m;i++) { 4527 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4528 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4529 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4530 } 4531 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4532 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4533 4534 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4535 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4536 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4537 sprintf(name,"%s.%d",outfile,rank); 4538 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4539 ierr = PetscFree(name);CHKERRQ(ierr); 4540 ierr = MatView(B,out);CHKERRQ(ierr); 4541 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4542 ierr = MatDestroy(&B);CHKERRQ(ierr); 4543 PetscFunctionReturn(0); 4544 } 4545 4546 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4547 #undef __FUNCT__ 4548 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4549 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4550 { 4551 PetscErrorCode ierr; 4552 Mat_Merge_SeqsToMPI *merge; 4553 PetscContainer container; 4554 4555 PetscFunctionBegin; 4556 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4557 if (container) { 4558 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4559 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4560 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4561 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4562 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4563 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4564 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4565 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4566 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4567 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4568 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4569 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4570 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4571 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4572 ierr = PetscFree(merge);CHKERRQ(ierr); 4573 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4574 } 4575 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4576 PetscFunctionReturn(0); 4577 } 4578 4579 #include <../src/mat/utils/freespace.h> 4580 #include <petscbt.h> 4581 4582 #undef __FUNCT__ 4583 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4584 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4585 { 4586 PetscErrorCode ierr; 4587 MPI_Comm comm=((PetscObject)mpimat)->comm; 4588 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4589 PetscMPIInt size,rank,taga,*len_s; 4590 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4591 PetscInt proc,m; 4592 PetscInt **buf_ri,**buf_rj; 4593 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4594 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4595 MPI_Request *s_waits,*r_waits; 4596 MPI_Status *status; 4597 MatScalar *aa=a->a; 4598 MatScalar **abuf_r,*ba_i; 4599 Mat_Merge_SeqsToMPI *merge; 4600 PetscContainer container; 4601 4602 PetscFunctionBegin; 4603 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4604 4605 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4606 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4607 4608 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4609 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4610 4611 bi = merge->bi; 4612 bj = merge->bj; 4613 buf_ri = merge->buf_ri; 4614 buf_rj = merge->buf_rj; 4615 4616 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4617 owners = merge->rowmap->range; 4618 len_s = merge->len_s; 4619 4620 /* send and recv matrix values */ 4621 /*-----------------------------*/ 4622 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4623 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4624 4625 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4626 for (proc=0,k=0; proc<size; proc++) { 4627 if (!len_s[proc]) continue; 4628 i = owners[proc]; 4629 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4630 k++; 4631 } 4632 4633 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4634 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4635 ierr = PetscFree(status);CHKERRQ(ierr); 4636 4637 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4638 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4639 4640 /* insert mat values of mpimat */ 4641 /*----------------------------*/ 4642 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4643 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4644 4645 for (k=0; k<merge->nrecv; k++) { 4646 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4647 nrows = *(buf_ri_k[k]); 4648 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4649 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4650 } 4651 4652 /* set values of ba */ 4653 m = merge->rowmap->n; 4654 for (i=0; i<m; i++) { 4655 arow = owners[rank] + i; 4656 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4657 bnzi = bi[i+1] - bi[i]; 4658 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4659 4660 /* add local non-zero vals of this proc's seqmat into ba */ 4661 anzi = ai[arow+1] - ai[arow]; 4662 aj = a->j + ai[arow]; 4663 aa = a->a + ai[arow]; 4664 nextaj = 0; 4665 for (j=0; nextaj<anzi; j++) { 4666 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4667 ba_i[j] += aa[nextaj++]; 4668 } 4669 } 4670 4671 /* add received vals into ba */ 4672 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4673 /* i-th row */ 4674 if (i == *nextrow[k]) { 4675 anzi = *(nextai[k]+1) - *nextai[k]; 4676 aj = buf_rj[k] + *(nextai[k]); 4677 aa = abuf_r[k] + *(nextai[k]); 4678 nextaj = 0; 4679 for (j=0; nextaj<anzi; j++) { 4680 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4681 ba_i[j] += aa[nextaj++]; 4682 } 4683 } 4684 nextrow[k]++; nextai[k]++; 4685 } 4686 } 4687 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4688 } 4689 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4690 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4691 4692 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4693 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4694 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4695 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4696 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4697 PetscFunctionReturn(0); 4698 } 4699 4700 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4701 4702 #undef __FUNCT__ 4703 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4704 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4705 { 4706 PetscErrorCode ierr; 4707 Mat B_mpi; 4708 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4709 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4710 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4711 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4712 PetscInt len,proc,*dnz,*onz,bs,cbs; 4713 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4714 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4715 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4716 MPI_Status *status; 4717 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4718 PetscBT lnkbt; 4719 Mat_Merge_SeqsToMPI *merge; 4720 PetscContainer container; 4721 4722 PetscFunctionBegin; 4723 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4724 4725 /* make sure it is a PETSc comm */ 4726 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4727 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4728 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4729 4730 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4731 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4732 4733 /* determine row ownership */ 4734 /*---------------------------------------------------------*/ 4735 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4736 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4737 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4738 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4739 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4740 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4741 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4742 4743 m = merge->rowmap->n; 4744 owners = merge->rowmap->range; 4745 4746 /* determine the number of messages to send, their lengths */ 4747 /*---------------------------------------------------------*/ 4748 len_s = merge->len_s; 4749 4750 len = 0; /* length of buf_si[] */ 4751 merge->nsend = 0; 4752 for (proc=0; proc<size; proc++) { 4753 len_si[proc] = 0; 4754 if (proc == rank) { 4755 len_s[proc] = 0; 4756 } else { 4757 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4758 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4759 } 4760 if (len_s[proc]) { 4761 merge->nsend++; 4762 nrows = 0; 4763 for (i=owners[proc]; i<owners[proc+1]; i++) { 4764 if (ai[i+1] > ai[i]) nrows++; 4765 } 4766 len_si[proc] = 2*(nrows+1); 4767 len += len_si[proc]; 4768 } 4769 } 4770 4771 /* determine the number and length of messages to receive for ij-structure */ 4772 /*-------------------------------------------------------------------------*/ 4773 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4774 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4775 4776 /* post the Irecv of j-structure */ 4777 /*-------------------------------*/ 4778 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4779 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4780 4781 /* post the Isend of j-structure */ 4782 /*--------------------------------*/ 4783 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4784 4785 for (proc=0, k=0; proc<size; proc++) { 4786 if (!len_s[proc]) continue; 4787 i = owners[proc]; 4788 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4789 k++; 4790 } 4791 4792 /* receives and sends of j-structure are complete */ 4793 /*------------------------------------------------*/ 4794 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4795 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4796 4797 /* send and recv i-structure */ 4798 /*---------------------------*/ 4799 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4800 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4801 4802 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4803 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4804 for (proc=0,k=0; proc<size; proc++) { 4805 if (!len_s[proc]) continue; 4806 /* form outgoing message for i-structure: 4807 buf_si[0]: nrows to be sent 4808 [1:nrows]: row index (global) 4809 [nrows+1:2*nrows+1]: i-structure index 4810 */ 4811 /*-------------------------------------------*/ 4812 nrows = len_si[proc]/2 - 1; 4813 buf_si_i = buf_si + nrows+1; 4814 buf_si[0] = nrows; 4815 buf_si_i[0] = 0; 4816 nrows = 0; 4817 for (i=owners[proc]; i<owners[proc+1]; i++) { 4818 anzi = ai[i+1] - ai[i]; 4819 if (anzi) { 4820 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4821 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4822 nrows++; 4823 } 4824 } 4825 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4826 k++; 4827 buf_si += len_si[proc]; 4828 } 4829 4830 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4831 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4832 4833 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4834 for (i=0; i<merge->nrecv; i++) { 4835 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4836 } 4837 4838 ierr = PetscFree(len_si);CHKERRQ(ierr); 4839 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4840 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4841 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4842 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4843 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4844 ierr = PetscFree(status);CHKERRQ(ierr); 4845 4846 /* compute a local seq matrix in each processor */ 4847 /*----------------------------------------------*/ 4848 /* allocate bi array and free space for accumulating nonzero column info */ 4849 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4850 bi[0] = 0; 4851 4852 /* create and initialize a linked list */ 4853 nlnk = N+1; 4854 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4855 4856 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4857 len = ai[owners[rank+1]] - ai[owners[rank]]; 4858 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4859 current_space = free_space; 4860 4861 /* determine symbolic info for each local row */ 4862 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4863 4864 for (k=0; k<merge->nrecv; k++) { 4865 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4866 nrows = *buf_ri_k[k]; 4867 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4868 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4869 } 4870 4871 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4872 len = 0; 4873 for (i=0;i<m;i++) { 4874 bnzi = 0; 4875 /* add local non-zero cols of this proc's seqmat into lnk */ 4876 arow = owners[rank] + i; 4877 anzi = ai[arow+1] - ai[arow]; 4878 aj = a->j + ai[arow]; 4879 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4880 bnzi += nlnk; 4881 /* add received col data into lnk */ 4882 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4883 if (i == *nextrow[k]) { /* i-th row */ 4884 anzi = *(nextai[k]+1) - *nextai[k]; 4885 aj = buf_rj[k] + *nextai[k]; 4886 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4887 bnzi += nlnk; 4888 nextrow[k]++; nextai[k]++; 4889 } 4890 } 4891 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4892 4893 /* if free space is not available, make more free space */ 4894 if (current_space->local_remaining<bnzi) { 4895 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4896 nspacedouble++; 4897 } 4898 /* copy data into free space, then initialize lnk */ 4899 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4900 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4901 4902 current_space->array += bnzi; 4903 current_space->local_used += bnzi; 4904 current_space->local_remaining -= bnzi; 4905 4906 bi[i+1] = bi[i] + bnzi; 4907 } 4908 4909 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4910 4911 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4912 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4913 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4914 4915 /* create symbolic parallel matrix B_mpi */ 4916 /*---------------------------------------*/ 4917 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4918 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4919 if (n==PETSC_DECIDE) { 4920 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4921 } else { 4922 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4923 } 4924 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4925 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4926 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4927 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4928 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4929 4930 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4931 B_mpi->assembled = PETSC_FALSE; 4932 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4933 merge->bi = bi; 4934 merge->bj = bj; 4935 merge->buf_ri = buf_ri; 4936 merge->buf_rj = buf_rj; 4937 merge->coi = PETSC_NULL; 4938 merge->coj = PETSC_NULL; 4939 merge->owners_co = PETSC_NULL; 4940 4941 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4942 4943 /* attach the supporting struct to B_mpi for reuse */ 4944 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4945 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4946 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4947 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4948 *mpimat = B_mpi; 4949 4950 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4951 PetscFunctionReturn(0); 4952 } 4953 4954 #undef __FUNCT__ 4955 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4956 /*@C 4957 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4958 matrices from each processor 4959 4960 Collective on MPI_Comm 4961 4962 Input Parameters: 4963 + comm - the communicators the parallel matrix will live on 4964 . seqmat - the input sequential matrices 4965 . m - number of local rows (or PETSC_DECIDE) 4966 . n - number of local columns (or PETSC_DECIDE) 4967 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4968 4969 Output Parameter: 4970 . mpimat - the parallel matrix generated 4971 4972 Level: advanced 4973 4974 Notes: 4975 The dimensions of the sequential matrix in each processor MUST be the same. 4976 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4977 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4978 @*/ 4979 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4980 { 4981 PetscErrorCode ierr; 4982 PetscMPIInt size; 4983 4984 PetscFunctionBegin; 4985 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4986 if (size == 1) { 4987 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4988 if (scall == MAT_INITIAL_MATRIX) { 4989 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4990 } else { 4991 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4992 } 4993 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4994 PetscFunctionReturn(0); 4995 } 4996 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4997 if (scall == MAT_INITIAL_MATRIX) { 4998 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4999 } 5000 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5001 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5002 PetscFunctionReturn(0); 5003 } 5004 5005 #undef __FUNCT__ 5006 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5007 /*@ 5008 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5009 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5010 with MatGetSize() 5011 5012 Not Collective 5013 5014 Input Parameters: 5015 + A - the matrix 5016 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5017 5018 Output Parameter: 5019 . A_loc - the local sequential matrix generated 5020 5021 Level: developer 5022 5023 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5024 5025 @*/ 5026 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5027 { 5028 PetscErrorCode ierr; 5029 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5030 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5031 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5032 MatScalar *aa=a->a,*ba=b->a,*cam; 5033 PetscScalar *ca; 5034 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5035 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5036 PetscBool match; 5037 5038 PetscFunctionBegin; 5039 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5040 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5041 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5042 if (scall == MAT_INITIAL_MATRIX) { 5043 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5044 ci[0] = 0; 5045 for (i=0; i<am; i++) { 5046 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5047 } 5048 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5049 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5050 k = 0; 5051 for (i=0; i<am; i++) { 5052 ncols_o = bi[i+1] - bi[i]; 5053 ncols_d = ai[i+1] - ai[i]; 5054 /* off-diagonal portion of A */ 5055 for (jo=0; jo<ncols_o; jo++) { 5056 col = cmap[*bj]; 5057 if (col >= cstart) break; 5058 cj[k] = col; bj++; 5059 ca[k++] = *ba++; 5060 } 5061 /* diagonal portion of A */ 5062 for (j=0; j<ncols_d; j++) { 5063 cj[k] = cstart + *aj++; 5064 ca[k++] = *aa++; 5065 } 5066 /* off-diagonal portion of A */ 5067 for (j=jo; j<ncols_o; j++) { 5068 cj[k] = cmap[*bj++]; 5069 ca[k++] = *ba++; 5070 } 5071 } 5072 /* put together the new matrix */ 5073 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5074 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5075 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5076 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5077 mat->free_a = PETSC_TRUE; 5078 mat->free_ij = PETSC_TRUE; 5079 mat->nonew = 0; 5080 } else if (scall == MAT_REUSE_MATRIX) { 5081 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5082 ci = mat->i; cj = mat->j; cam = mat->a; 5083 for (i=0; i<am; i++) { 5084 /* off-diagonal portion of A */ 5085 ncols_o = bi[i+1] - bi[i]; 5086 for (jo=0; jo<ncols_o; jo++) { 5087 col = cmap[*bj]; 5088 if (col >= cstart) break; 5089 *cam++ = *ba++; bj++; 5090 } 5091 /* diagonal portion of A */ 5092 ncols_d = ai[i+1] - ai[i]; 5093 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5094 /* off-diagonal portion of A */ 5095 for (j=jo; j<ncols_o; j++) { 5096 *cam++ = *ba++; bj++; 5097 } 5098 } 5099 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5100 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5101 PetscFunctionReturn(0); 5102 } 5103 5104 #undef __FUNCT__ 5105 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5106 /*@C 5107 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5108 5109 Not Collective 5110 5111 Input Parameters: 5112 + A - the matrix 5113 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5114 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5115 5116 Output Parameter: 5117 . A_loc - the local sequential matrix generated 5118 5119 Level: developer 5120 5121 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5122 5123 @*/ 5124 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5125 { 5126 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5127 PetscErrorCode ierr; 5128 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5129 IS isrowa,iscola; 5130 Mat *aloc; 5131 PetscBool match; 5132 5133 PetscFunctionBegin; 5134 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5135 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5136 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5137 if (!row) { 5138 start = A->rmap->rstart; end = A->rmap->rend; 5139 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5140 } else { 5141 isrowa = *row; 5142 } 5143 if (!col) { 5144 start = A->cmap->rstart; 5145 cmap = a->garray; 5146 nzA = a->A->cmap->n; 5147 nzB = a->B->cmap->n; 5148 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5149 ncols = 0; 5150 for (i=0; i<nzB; i++) { 5151 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5152 else break; 5153 } 5154 imark = i; 5155 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5156 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5157 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5158 } else { 5159 iscola = *col; 5160 } 5161 if (scall != MAT_INITIAL_MATRIX) { 5162 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5163 aloc[0] = *A_loc; 5164 } 5165 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5166 *A_loc = aloc[0]; 5167 ierr = PetscFree(aloc);CHKERRQ(ierr); 5168 if (!row) { 5169 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5170 } 5171 if (!col) { 5172 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5173 } 5174 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5175 PetscFunctionReturn(0); 5176 } 5177 5178 #undef __FUNCT__ 5179 #define __FUNCT__ "MatGetBrowsOfAcols" 5180 /*@C 5181 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5182 5183 Collective on Mat 5184 5185 Input Parameters: 5186 + A,B - the matrices in mpiaij format 5187 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5188 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5189 5190 Output Parameter: 5191 + rowb, colb - index sets of rows and columns of B to extract 5192 - B_seq - the sequential matrix generated 5193 5194 Level: developer 5195 5196 @*/ 5197 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5198 { 5199 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5200 PetscErrorCode ierr; 5201 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5202 IS isrowb,iscolb; 5203 Mat *bseq=PETSC_NULL; 5204 5205 PetscFunctionBegin; 5206 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5207 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5208 } 5209 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5210 5211 if (scall == MAT_INITIAL_MATRIX) { 5212 start = A->cmap->rstart; 5213 cmap = a->garray; 5214 nzA = a->A->cmap->n; 5215 nzB = a->B->cmap->n; 5216 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5217 ncols = 0; 5218 for (i=0; i<nzB; i++) { /* row < local row index */ 5219 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5220 else break; 5221 } 5222 imark = i; 5223 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5224 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5225 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5226 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5227 } else { 5228 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5229 isrowb = *rowb; iscolb = *colb; 5230 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5231 bseq[0] = *B_seq; 5232 } 5233 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5234 *B_seq = bseq[0]; 5235 ierr = PetscFree(bseq);CHKERRQ(ierr); 5236 if (!rowb) { 5237 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5238 } else { 5239 *rowb = isrowb; 5240 } 5241 if (!colb) { 5242 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5243 } else { 5244 *colb = iscolb; 5245 } 5246 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5247 PetscFunctionReturn(0); 5248 } 5249 5250 #undef __FUNCT__ 5251 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5252 /* 5253 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5254 of the OFF-DIAGONAL portion of local A 5255 5256 Collective on Mat 5257 5258 Input Parameters: 5259 + A,B - the matrices in mpiaij format 5260 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5261 5262 Output Parameter: 5263 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5264 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5265 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5266 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5267 5268 Level: developer 5269 5270 */ 5271 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5272 { 5273 VecScatter_MPI_General *gen_to,*gen_from; 5274 PetscErrorCode ierr; 5275 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5276 Mat_SeqAIJ *b_oth; 5277 VecScatter ctx=a->Mvctx; 5278 MPI_Comm comm=((PetscObject)ctx)->comm; 5279 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5280 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5281 PetscScalar *rvalues,*svalues; 5282 MatScalar *b_otha,*bufa,*bufA; 5283 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5284 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5285 MPI_Status *sstatus,rstatus; 5286 PetscMPIInt jj; 5287 PetscInt *cols,sbs,rbs; 5288 PetscScalar *vals; 5289 5290 PetscFunctionBegin; 5291 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5292 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5293 } 5294 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5295 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5296 5297 gen_to = (VecScatter_MPI_General*)ctx->todata; 5298 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5299 rvalues = gen_from->values; /* holds the length of receiving row */ 5300 svalues = gen_to->values; /* holds the length of sending row */ 5301 nrecvs = gen_from->n; 5302 nsends = gen_to->n; 5303 5304 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5305 srow = gen_to->indices; /* local row index to be sent */ 5306 sstarts = gen_to->starts; 5307 sprocs = gen_to->procs; 5308 sstatus = gen_to->sstatus; 5309 sbs = gen_to->bs; 5310 rstarts = gen_from->starts; 5311 rprocs = gen_from->procs; 5312 rbs = gen_from->bs; 5313 5314 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5315 if (scall == MAT_INITIAL_MATRIX) { 5316 /* i-array */ 5317 /*---------*/ 5318 /* post receives */ 5319 for (i=0; i<nrecvs; i++) { 5320 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5321 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5322 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5323 } 5324 5325 /* pack the outgoing message */ 5326 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5327 sstartsj[0] = 0; rstartsj[0] = 0; 5328 len = 0; /* total length of j or a array to be sent */ 5329 k = 0; 5330 for (i=0; i<nsends; i++) { 5331 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5332 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5333 for (j=0; j<nrows; j++) { 5334 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5335 for (l=0; l<sbs; l++) { 5336 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5337 rowlen[j*sbs+l] = ncols; 5338 len += ncols; 5339 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5340 } 5341 k++; 5342 } 5343 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5344 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5345 } 5346 /* recvs and sends of i-array are completed */ 5347 i = nrecvs; 5348 while (i--) { 5349 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5350 } 5351 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5352 5353 /* allocate buffers for sending j and a arrays */ 5354 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5355 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5356 5357 /* create i-array of B_oth */ 5358 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5359 b_othi[0] = 0; 5360 len = 0; /* total length of j or a array to be received */ 5361 k = 0; 5362 for (i=0; i<nrecvs; i++) { 5363 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5364 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5365 for (j=0; j<nrows; j++) { 5366 b_othi[k+1] = b_othi[k] + rowlen[j]; 5367 len += rowlen[j]; k++; 5368 } 5369 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5370 } 5371 5372 /* allocate space for j and a arrrays of B_oth */ 5373 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5374 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5375 5376 /* j-array */ 5377 /*---------*/ 5378 /* post receives of j-array */ 5379 for (i=0; i<nrecvs; i++) { 5380 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5381 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5382 } 5383 5384 /* pack the outgoing message j-array */ 5385 k = 0; 5386 for (i=0; i<nsends; i++) { 5387 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5388 bufJ = bufj+sstartsj[i]; 5389 for (j=0; j<nrows; j++) { 5390 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5391 for (ll=0; ll<sbs; ll++) { 5392 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5393 for (l=0; l<ncols; l++) { 5394 *bufJ++ = cols[l]; 5395 } 5396 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5397 } 5398 } 5399 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5400 } 5401 5402 /* recvs and sends of j-array are completed */ 5403 i = nrecvs; 5404 while (i--) { 5405 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5406 } 5407 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5408 } else if (scall == MAT_REUSE_MATRIX) { 5409 sstartsj = *startsj_s; 5410 rstartsj = *startsj_r; 5411 bufa = *bufa_ptr; 5412 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5413 b_otha = b_oth->a; 5414 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5415 5416 /* a-array */ 5417 /*---------*/ 5418 /* post receives of a-array */ 5419 for (i=0; i<nrecvs; i++) { 5420 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5421 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5422 } 5423 5424 /* pack the outgoing message a-array */ 5425 k = 0; 5426 for (i=0; i<nsends; i++) { 5427 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5428 bufA = bufa+sstartsj[i]; 5429 for (j=0; j<nrows; j++) { 5430 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5431 for (ll=0; ll<sbs; ll++) { 5432 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5433 for (l=0; l<ncols; l++) { 5434 *bufA++ = vals[l]; 5435 } 5436 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5437 } 5438 } 5439 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5440 } 5441 /* recvs and sends of a-array are completed */ 5442 i = nrecvs; 5443 while (i--) { 5444 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5445 } 5446 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5447 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5448 5449 if (scall == MAT_INITIAL_MATRIX) { 5450 /* put together the new matrix */ 5451 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5452 5453 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5454 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5455 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5456 b_oth->free_a = PETSC_TRUE; 5457 b_oth->free_ij = PETSC_TRUE; 5458 b_oth->nonew = 0; 5459 5460 ierr = PetscFree(bufj);CHKERRQ(ierr); 5461 if (!startsj_s || !bufa_ptr) { 5462 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5463 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5464 } else { 5465 *startsj_s = sstartsj; 5466 *startsj_r = rstartsj; 5467 *bufa_ptr = bufa; 5468 } 5469 } 5470 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5471 PetscFunctionReturn(0); 5472 } 5473 5474 #undef __FUNCT__ 5475 #define __FUNCT__ "MatGetCommunicationStructs" 5476 /*@C 5477 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5478 5479 Not Collective 5480 5481 Input Parameters: 5482 . A - The matrix in mpiaij format 5483 5484 Output Parameter: 5485 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5486 . colmap - A map from global column index to local index into lvec 5487 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5488 5489 Level: developer 5490 5491 @*/ 5492 #if defined (PETSC_USE_CTABLE) 5493 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5494 #else 5495 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5496 #endif 5497 { 5498 Mat_MPIAIJ *a; 5499 5500 PetscFunctionBegin; 5501 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5502 PetscValidPointer(lvec, 2); 5503 PetscValidPointer(colmap, 3); 5504 PetscValidPointer(multScatter, 4); 5505 a = (Mat_MPIAIJ *) A->data; 5506 if (lvec) *lvec = a->lvec; 5507 if (colmap) *colmap = a->colmap; 5508 if (multScatter) *multScatter = a->Mvctx; 5509 PetscFunctionReturn(0); 5510 } 5511 5512 EXTERN_C_BEGIN 5513 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5514 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5515 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5516 EXTERN_C_END 5517 5518 #undef __FUNCT__ 5519 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5520 /* 5521 Computes (B'*A')' since computing B*A directly is untenable 5522 5523 n p p 5524 ( ) ( ) ( ) 5525 m ( A ) * n ( B ) = m ( C ) 5526 ( ) ( ) ( ) 5527 5528 */ 5529 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5530 { 5531 PetscErrorCode ierr; 5532 Mat At,Bt,Ct; 5533 5534 PetscFunctionBegin; 5535 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5536 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5537 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5538 ierr = MatDestroy(&At);CHKERRQ(ierr); 5539 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5540 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5541 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5542 PetscFunctionReturn(0); 5543 } 5544 5545 #undef __FUNCT__ 5546 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5547 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5548 { 5549 PetscErrorCode ierr; 5550 PetscInt m=A->rmap->n,n=B->cmap->n; 5551 Mat Cmat; 5552 5553 PetscFunctionBegin; 5554 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5555 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5556 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5557 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5558 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5559 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5560 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5561 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5562 5563 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5564 *C = Cmat; 5565 PetscFunctionReturn(0); 5566 } 5567 5568 /* ----------------------------------------------------------------*/ 5569 #undef __FUNCT__ 5570 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5571 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5572 { 5573 PetscErrorCode ierr; 5574 5575 PetscFunctionBegin; 5576 if (scall == MAT_INITIAL_MATRIX) { 5577 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5578 } 5579 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5580 PetscFunctionReturn(0); 5581 } 5582 5583 EXTERN_C_BEGIN 5584 #if defined(PETSC_HAVE_MUMPS) 5585 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5586 #endif 5587 #if defined(PETSC_HAVE_PASTIX) 5588 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5589 #endif 5590 #if defined(PETSC_HAVE_SUPERLU_DIST) 5591 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5592 #endif 5593 #if defined(PETSC_HAVE_CLIQUE) 5594 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5595 #endif 5596 EXTERN_C_END 5597 5598 /*MC 5599 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5600 5601 Options Database Keys: 5602 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5603 5604 Level: beginner 5605 5606 .seealso: MatCreateAIJ() 5607 M*/ 5608 5609 EXTERN_C_BEGIN 5610 #undef __FUNCT__ 5611 #define __FUNCT__ "MatCreate_MPIAIJ" 5612 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5613 { 5614 Mat_MPIAIJ *b; 5615 PetscErrorCode ierr; 5616 PetscMPIInt size; 5617 5618 PetscFunctionBegin; 5619 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5620 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5621 B->data = (void*)b; 5622 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5623 B->assembled = PETSC_FALSE; 5624 B->insertmode = NOT_SET_VALUES; 5625 b->size = size; 5626 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5627 5628 /* build cache for off array entries formed */ 5629 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5630 b->donotstash = PETSC_FALSE; 5631 b->colmap = 0; 5632 b->garray = 0; 5633 b->roworiented = PETSC_TRUE; 5634 5635 /* stuff used for matrix vector multiply */ 5636 b->lvec = PETSC_NULL; 5637 b->Mvctx = PETSC_NULL; 5638 5639 /* stuff for MatGetRow() */ 5640 b->rowindices = 0; 5641 b->rowvalues = 0; 5642 b->getrowactive = PETSC_FALSE; 5643 5644 /* flexible pointer used in CUSP/CUSPARSE classes */ 5645 b->spptr = PETSC_NULL; 5646 5647 #if defined(PETSC_HAVE_MUMPS) 5648 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5649 "MatGetFactor_aij_mumps", 5650 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5651 #endif 5652 #if defined(PETSC_HAVE_PASTIX) 5653 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5654 "MatGetFactor_mpiaij_pastix", 5655 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5656 #endif 5657 #if defined(PETSC_HAVE_SUPERLU_DIST) 5658 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5659 "MatGetFactor_mpiaij_superlu_dist", 5660 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5661 #endif 5662 #if defined(PETSC_HAVE_CLIQUE) 5663 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5664 "MatGetFactor_aij_clique", 5665 MatGetFactor_aij_clique);CHKERRQ(ierr); 5666 #endif 5667 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5668 "MatStoreValues_MPIAIJ", 5669 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5670 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5671 "MatRetrieveValues_MPIAIJ", 5672 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5673 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5674 "MatGetDiagonalBlock_MPIAIJ", 5675 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5676 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5677 "MatIsTranspose_MPIAIJ", 5678 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5679 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5680 "MatMPIAIJSetPreallocation_MPIAIJ", 5681 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5682 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5683 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5684 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5685 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5686 "MatDiagonalScaleLocal_MPIAIJ", 5687 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5688 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5689 "MatConvert_MPIAIJ_MPIAIJPERM", 5690 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5691 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5692 "MatConvert_MPIAIJ_MPIAIJCRL", 5693 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5694 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5695 "MatConvert_MPIAIJ_MPISBAIJ", 5696 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5697 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5698 "MatMatMult_MPIDense_MPIAIJ", 5699 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5700 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5701 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5702 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5703 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5704 "MatMatMultNumeric_MPIDense_MPIAIJ", 5705 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5706 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5707 PetscFunctionReturn(0); 5708 } 5709 EXTERN_C_END 5710 5711 #undef __FUNCT__ 5712 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5713 /*@ 5714 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5715 and "off-diagonal" part of the matrix in CSR format. 5716 5717 Collective on MPI_Comm 5718 5719 Input Parameters: 5720 + comm - MPI communicator 5721 . m - number of local rows (Cannot be PETSC_DECIDE) 5722 . n - This value should be the same as the local size used in creating the 5723 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5724 calculated if N is given) For square matrices n is almost always m. 5725 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5726 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5727 . i - row indices for "diagonal" portion of matrix 5728 . j - column indices 5729 . a - matrix values 5730 . oi - row indices for "off-diagonal" portion of matrix 5731 . oj - column indices 5732 - oa - matrix values 5733 5734 Output Parameter: 5735 . mat - the matrix 5736 5737 Level: advanced 5738 5739 Notes: 5740 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5741 must free the arrays once the matrix has been destroyed and not before. 5742 5743 The i and j indices are 0 based 5744 5745 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5746 5747 This sets local rows and cannot be used to set off-processor values. 5748 5749 You cannot later use MatSetValues() to change values in this matrix. 5750 5751 .keywords: matrix, aij, compressed row, sparse, parallel 5752 5753 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5754 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5755 @*/ 5756 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5757 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5758 { 5759 PetscErrorCode ierr; 5760 Mat_MPIAIJ *maij; 5761 5762 PetscFunctionBegin; 5763 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5764 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5765 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5766 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5767 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5768 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5769 maij = (Mat_MPIAIJ*) (*mat)->data; 5770 maij->donotstash = PETSC_TRUE; 5771 (*mat)->preallocated = PETSC_TRUE; 5772 5773 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5774 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5775 5776 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5777 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5778 5779 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5780 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5781 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5782 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5783 5784 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5785 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5786 PetscFunctionReturn(0); 5787 } 5788 5789 /* 5790 Special version for direct calls from Fortran 5791 */ 5792 #include <petsc-private/fortranimpl.h> 5793 5794 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5795 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5796 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5797 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5798 #endif 5799 5800 /* Change these macros so can be used in void function */ 5801 #undef CHKERRQ 5802 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5803 #undef SETERRQ2 5804 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5805 #undef SETERRQ3 5806 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5807 #undef SETERRQ 5808 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5809 5810 EXTERN_C_BEGIN 5811 #undef __FUNCT__ 5812 #define __FUNCT__ "matsetvaluesmpiaij_" 5813 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5814 { 5815 Mat mat = *mmat; 5816 PetscInt m = *mm, n = *mn; 5817 InsertMode addv = *maddv; 5818 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5819 PetscScalar value; 5820 PetscErrorCode ierr; 5821 5822 MatCheckPreallocated(mat,1); 5823 if (mat->insertmode == NOT_SET_VALUES) { 5824 mat->insertmode = addv; 5825 } 5826 #if defined(PETSC_USE_DEBUG) 5827 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5828 #endif 5829 { 5830 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5831 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5832 PetscBool roworiented = aij->roworiented; 5833 5834 /* Some Variables required in the macro */ 5835 Mat A = aij->A; 5836 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5837 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5838 MatScalar *aa = a->a; 5839 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5840 Mat B = aij->B; 5841 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5842 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5843 MatScalar *ba = b->a; 5844 5845 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5846 PetscInt nonew = a->nonew; 5847 MatScalar *ap1,*ap2; 5848 5849 PetscFunctionBegin; 5850 for (i=0; i<m; i++) { 5851 if (im[i] < 0) continue; 5852 #if defined(PETSC_USE_DEBUG) 5853 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5854 #endif 5855 if (im[i] >= rstart && im[i] < rend) { 5856 row = im[i] - rstart; 5857 lastcol1 = -1; 5858 rp1 = aj + ai[row]; 5859 ap1 = aa + ai[row]; 5860 rmax1 = aimax[row]; 5861 nrow1 = ailen[row]; 5862 low1 = 0; 5863 high1 = nrow1; 5864 lastcol2 = -1; 5865 rp2 = bj + bi[row]; 5866 ap2 = ba + bi[row]; 5867 rmax2 = bimax[row]; 5868 nrow2 = bilen[row]; 5869 low2 = 0; 5870 high2 = nrow2; 5871 5872 for (j=0; j<n; j++) { 5873 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5874 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5875 if (in[j] >= cstart && in[j] < cend) { 5876 col = in[j] - cstart; 5877 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5878 } else if (in[j] < 0) continue; 5879 #if defined(PETSC_USE_DEBUG) 5880 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5881 #endif 5882 else { 5883 if (mat->was_assembled) { 5884 if (!aij->colmap) { 5885 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5886 } 5887 #if defined (PETSC_USE_CTABLE) 5888 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5889 col--; 5890 #else 5891 col = aij->colmap[in[j]] - 1; 5892 #endif 5893 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5894 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5895 col = in[j]; 5896 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5897 B = aij->B; 5898 b = (Mat_SeqAIJ*)B->data; 5899 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5900 rp2 = bj + bi[row]; 5901 ap2 = ba + bi[row]; 5902 rmax2 = bimax[row]; 5903 nrow2 = bilen[row]; 5904 low2 = 0; 5905 high2 = nrow2; 5906 bm = aij->B->rmap->n; 5907 ba = b->a; 5908 } 5909 } else col = in[j]; 5910 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5911 } 5912 } 5913 } else { 5914 if (!aij->donotstash) { 5915 if (roworiented) { 5916 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5917 } else { 5918 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5919 } 5920 } 5921 } 5922 }} 5923 PetscFunctionReturnVoid(); 5924 } 5925 EXTERN_C_END 5926 5927