1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for (j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) { 215 rowners[i] += rowners[i-1]; 216 } 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 *inmat = mat; 300 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 301 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 302 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 303 mat = *inmat; 304 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 305 if (!rank) { 306 /* send numerical values to other processes */ 307 gmata = (Mat_SeqAIJ*) gmat->data; 308 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 309 gmataa = gmata->a; 310 for (i=1; i<size; i++) { 311 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 312 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 313 } 314 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 315 } else { 316 /* receive numerical values from process 0*/ 317 nz = Ad->nz + Ao->nz; 318 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 319 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 320 } 321 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 322 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 323 ad = Ad->a; 324 ao = Ao->a; 325 if (mat->rmap->n) { 326 i = 0; 327 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 328 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 329 } 330 for (i=1; i<mat->rmap->n; i++) { 331 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 i--; 335 if (mat->rmap->n) { 336 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 337 } 338 if (rank) { 339 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 340 } 341 } 342 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 CHKMEMQ; 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined (PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++){ 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 373 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 375 #endif 376 PetscFunctionReturn(0); 377 } 378 379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 380 { \ 381 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 a_noinsert: ; \ 409 ailen[row] = nrow1; \ 410 } 411 412 413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 414 { \ 415 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 416 lastcol2 = col;\ 417 while (high2-low2 > 5) { \ 418 t = (low2+high2)/2; \ 419 if (rp2[t] > col) high2 = t; \ 420 else low2 = t; \ 421 } \ 422 for (_i=low2; _i<high2; _i++) { \ 423 if (rp2[_i] > col) break; \ 424 if (rp2[_i] == col) { \ 425 if (addv == ADD_VALUES) ap2[_i] += value; \ 426 else ap2[_i] = value; \ 427 goto b_noinsert; \ 428 } \ 429 } \ 430 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 432 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 433 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 434 N = nrow2++ - 1; b->nz++; high2++; \ 435 /* shift up all the later entries in this row */ \ 436 for (ii=N; ii>=_i; ii--) { \ 437 rp2[ii+1] = rp2[ii]; \ 438 ap2[ii+1] = ap2[ii]; \ 439 } \ 440 rp2[_i] = col; \ 441 ap2[_i] = value; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend){ 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined (PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend){ 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined (PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col --; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else { 620 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 621 } 622 } 623 PetscFunctionReturn(0); 624 } 625 626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 631 { 632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 633 PetscErrorCode ierr; 634 PetscInt nstash,reallocs; 635 InsertMode addv; 636 637 PetscFunctionBegin; 638 if (aij->donotstash || mat->nooffprocentries) { 639 PetscFunctionReturn(0); 640 } 641 642 /* make sure all processors are either in INSERTMODE or ADDMODE */ 643 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 644 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 645 mat->insertmode = addv; /* in case this processor had no cache */ 646 647 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 648 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 649 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 656 { 657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 658 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 659 PetscErrorCode ierr; 660 PetscMPIInt n; 661 PetscInt i,j,rstart,ncols,flg; 662 PetscInt *row,*col; 663 PetscBool other_disassembled; 664 PetscScalar *val; 665 InsertMode addv = mat->insertmode; 666 667 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n;) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 677 if (j < n) ncols = j-i; 678 else ncols = n-i; 679 /* Now assemble all these values with a single function call */ 680 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 706 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 708 709 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 724 { 725 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 726 PetscErrorCode ierr; 727 728 PetscFunctionBegin; 729 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 730 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatZeroRows_MPIAIJ" 736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 737 { 738 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 739 PetscErrorCode ierr; 740 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 741 PetscInt i,*owners = A->rmap->range; 742 PetscInt *nprocs,j,idx,nsends,row; 743 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 744 PetscInt *rvalues,count,base,slen,*source; 745 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 746 MPI_Comm comm = ((PetscObject)A)->comm; 747 MPI_Request *send_waits,*recv_waits; 748 MPI_Status recv_status,*send_status; 749 const PetscScalar *xx; 750 PetscScalar *bb; 751 #if defined(PETSC_DEBUG) 752 PetscBool found = PETSC_FALSE; 753 #endif 754 755 PetscFunctionBegin; 756 /* first count number of contributors to each processor */ 757 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 758 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 759 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 760 j = 0; 761 for (i=0; i<N; i++) { 762 if (lastidx > (idx = rows[i])) j = 0; 763 lastidx = idx; 764 for (; j<size; j++) { 765 if (idx >= owners[j] && idx < owners[j+1]) { 766 nprocs[2*j]++; 767 nprocs[2*j+1] = 1; 768 owner[i] = j; 769 #if defined(PETSC_DEBUG) 770 found = PETSC_TRUE; 771 #endif 772 break; 773 } 774 } 775 #if defined(PETSC_DEBUG) 776 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 777 found = PETSC_FALSE; 778 #endif 779 } 780 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 781 782 if (A->nooffproczerorows) { 783 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 784 nrecvs = nsends; 785 nmax = N; 786 } else { 787 /* inform other processors of number of messages and max length*/ 788 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 789 } 790 791 /* post receives: */ 792 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 793 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 794 for (i=0; i<nrecvs; i++) { 795 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 796 } 797 798 /* do sends: 799 1) starts[i] gives the starting index in svalues for stuff going to 800 the ith processor 801 */ 802 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 804 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 805 starts[0] = 0; 806 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 807 for (i=0; i<N; i++) { 808 svalues[starts[owner[i]]++] = rows[i]; 809 } 810 811 starts[0] = 0; 812 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 813 count = 0; 814 for (i=0; i<size; i++) { 815 if (nprocs[2*i+1]) { 816 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 817 } 818 } 819 ierr = PetscFree(starts);CHKERRQ(ierr); 820 821 base = owners[rank]; 822 823 /* wait on receives */ 824 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 825 count = nrecvs; slen = 0; 826 while (count) { 827 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 828 /* unpack receives into our local space */ 829 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 830 source[imdex] = recv_status.MPI_SOURCE; 831 lens[imdex] = n; 832 slen += n; 833 count--; 834 } 835 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 836 837 /* move the data into the send scatter */ 838 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 839 count = 0; 840 for (i=0; i<nrecvs; i++) { 841 values = rvalues + i*nmax; 842 for (j=0; j<lens[i]; j++) { 843 lrows[count++] = values[j] - base; 844 } 845 } 846 ierr = PetscFree(rvalues);CHKERRQ(ierr); 847 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 848 ierr = PetscFree(owner);CHKERRQ(ierr); 849 ierr = PetscFree(nprocs);CHKERRQ(ierr); 850 851 /* fix right hand side if needed */ 852 if (x && b) { 853 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 854 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 855 for (i=0; i<slen; i++) { 856 bb[lrows[i]] = diag*xx[lrows[i]]; 857 } 858 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 859 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 860 } 861 /* 862 Zero the required rows. If the "diagonal block" of the matrix 863 is square and the user wishes to set the diagonal we use separate 864 code so that MatSetValues() is not called for each diagonal allocating 865 new memory, thus calling lots of mallocs and slowing things down. 866 867 */ 868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 869 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 871 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 872 } else if (diag != 0.0) { 873 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 874 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 875 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 877 } 878 for (i = 0; i < slen; i++) { 879 row = lrows[i] + rstart; 880 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 881 } 882 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 883 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 } else { 885 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 886 } 887 ierr = PetscFree(lrows);CHKERRQ(ierr); 888 889 /* wait on sends */ 890 if (nsends) { 891 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 892 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 893 ierr = PetscFree(send_status);CHKERRQ(ierr); 894 } 895 ierr = PetscFree(send_waits);CHKERRQ(ierr); 896 ierr = PetscFree(svalues);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 903 { 904 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 907 PetscInt i,*owners = A->rmap->range; 908 PetscInt *nprocs,j,idx,nsends; 909 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 910 PetscInt *rvalues,count,base,slen,*source; 911 PetscInt *lens,*lrows,*values,m; 912 MPI_Comm comm = ((PetscObject)A)->comm; 913 MPI_Request *send_waits,*recv_waits; 914 MPI_Status recv_status,*send_status; 915 const PetscScalar *xx; 916 PetscScalar *bb,*mask; 917 Vec xmask,lmask; 918 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 919 const PetscInt *aj, *ii,*ridx; 920 PetscScalar *aa; 921 #if defined(PETSC_DEBUG) 922 PetscBool found = PETSC_FALSE; 923 #endif 924 925 PetscFunctionBegin; 926 /* first count number of contributors to each processor */ 927 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 928 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 929 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 930 j = 0; 931 for (i=0; i<N; i++) { 932 if (lastidx > (idx = rows[i])) j = 0; 933 lastidx = idx; 934 for (; j<size; j++) { 935 if (idx >= owners[j] && idx < owners[j+1]) { 936 nprocs[2*j]++; 937 nprocs[2*j+1] = 1; 938 owner[i] = j; 939 #if defined(PETSC_DEBUG) 940 found = PETSC_TRUE; 941 #endif 942 break; 943 } 944 } 945 #if defined(PETSC_DEBUG) 946 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 947 found = PETSC_FALSE; 948 #endif 949 } 950 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 951 952 /* inform other processors of number of messages and max length*/ 953 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 954 955 /* post receives: */ 956 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 957 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 958 for (i=0; i<nrecvs; i++) { 959 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 960 } 961 962 /* do sends: 963 1) starts[i] gives the starting index in svalues for stuff going to 964 the ith processor 965 */ 966 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 967 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 968 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 969 starts[0] = 0; 970 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 971 for (i=0; i<N; i++) { 972 svalues[starts[owner[i]]++] = rows[i]; 973 } 974 975 starts[0] = 0; 976 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 977 count = 0; 978 for (i=0; i<size; i++) { 979 if (nprocs[2*i+1]) { 980 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 981 } 982 } 983 ierr = PetscFree(starts);CHKERRQ(ierr); 984 985 base = owners[rank]; 986 987 /* wait on receives */ 988 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 989 count = nrecvs; slen = 0; 990 while (count) { 991 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 992 /* unpack receives into our local space */ 993 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 994 source[imdex] = recv_status.MPI_SOURCE; 995 lens[imdex] = n; 996 slen += n; 997 count--; 998 } 999 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1000 1001 /* move the data into the send scatter */ 1002 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1003 count = 0; 1004 for (i=0; i<nrecvs; i++) { 1005 values = rvalues + i*nmax; 1006 for (j=0; j<lens[i]; j++) { 1007 lrows[count++] = values[j] - base; 1008 } 1009 } 1010 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1011 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1012 ierr = PetscFree(owner);CHKERRQ(ierr); 1013 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1014 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1015 1016 /* zero diagonal part of matrix */ 1017 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1018 1019 /* handle off diagonal part of matrix */ 1020 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1021 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1022 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1023 for (i=0; i<slen; i++) { 1024 bb[lrows[i]] = 1; 1025 } 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use){ 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++){ 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 EXTERN_C_BEGIN 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1179 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1180 { 1181 MPI_Comm comm; 1182 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1183 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1184 IS Me,Notme; 1185 PetscErrorCode ierr; 1186 PetscInt M,N,first,last,*notme,i; 1187 PetscMPIInt size; 1188 1189 PetscFunctionBegin; 1190 1191 /* Easy test: symmetric diagonal block */ 1192 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1193 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1194 if (!*f) PetscFunctionReturn(0); 1195 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1196 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1197 if (size == 1) PetscFunctionReturn(0); 1198 1199 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1200 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1201 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1202 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1203 for (i=0; i<first; i++) notme[i] = i; 1204 for (i=last; i<M; i++) notme[i-last+first] = i; 1205 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1206 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1207 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1208 Aoff = Aoffs[0]; 1209 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1210 Boff = Boffs[0]; 1211 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1212 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1213 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1214 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1215 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1216 ierr = PetscFree(notme);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 EXTERN_C_END 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1224 { 1225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* do nondiagonal part */ 1230 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1231 /* send it on its way */ 1232 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 /* do local part */ 1234 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1235 /* receive remote parts */ 1236 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 /* 1241 This only works correctly for square matrices where the subblock A->A is the 1242 diagonal block 1243 */ 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1247 { 1248 PetscErrorCode ierr; 1249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1250 1251 PetscFunctionBegin; 1252 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1253 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1254 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatScale_MPIAIJ" 1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1261 { 1262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1267 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatDestroy_MPIAIJ" 1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 #if defined(PETSC_USE_LOG) 1280 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1281 #endif 1282 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1283 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1284 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1285 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1286 #if defined (PETSC_USE_CTABLE) 1287 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1288 #else 1289 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1290 #endif 1291 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1292 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1293 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1294 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1295 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1296 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1316 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1319 int fd; 1320 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1321 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1322 PetscScalar *column_values; 1323 PetscInt message_count,flowcontrolcount; 1324 FILE *file; 1325 1326 PetscFunctionBegin; 1327 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1328 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1329 nz = A->nz + B->nz; 1330 if (!rank) { 1331 header[0] = MAT_FILE_CLASSID; 1332 header[1] = mat->rmap->N; 1333 header[2] = mat->cmap->N; 1334 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1335 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1336 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1337 /* get largest number of rows any processor has */ 1338 rlen = mat->rmap->n; 1339 range = mat->rmap->range; 1340 for (i=1; i<size; i++) { 1341 rlen = PetscMax(rlen,range[i+1] - range[i]); 1342 } 1343 } else { 1344 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 rlen = mat->rmap->n; 1346 } 1347 1348 /* load up the local row counts */ 1349 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1350 for (i=0; i<mat->rmap->n; i++) { 1351 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1352 } 1353 1354 /* store the row lengths to the file */ 1355 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1356 if (!rank) { 1357 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 for (i=1; i<size; i++) { 1359 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1360 rlen = range[i+1] - range[i]; 1361 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1362 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1363 } 1364 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1365 } else { 1366 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1367 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1368 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1369 } 1370 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1371 1372 /* load up the local column indices */ 1373 nzmax = nz; /* )th processor needs space a largest processor needs */ 1374 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1375 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1376 cnt = 0; 1377 for (i=0; i<mat->rmap->n; i++) { 1378 for (j=B->i[i]; j<B->i[i+1]; j++) { 1379 if ( (col = garray[B->j[j]]) > cstart) break; 1380 column_indices[cnt++] = col; 1381 } 1382 for (k=A->i[i]; k<A->i[i+1]; k++) { 1383 column_indices[cnt++] = A->j[k] + cstart; 1384 } 1385 for (; j<B->i[i+1]; j++) { 1386 column_indices[cnt++] = garray[B->j[j]]; 1387 } 1388 } 1389 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1390 1391 /* store the column indices to the file */ 1392 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1393 if (!rank) { 1394 MPI_Status status; 1395 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1396 for (i=1; i<size; i++) { 1397 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1398 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1399 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1400 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1401 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1402 } 1403 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1404 } else { 1405 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1406 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1407 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1408 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1409 } 1410 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1411 1412 /* load up the local column values */ 1413 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1414 cnt = 0; 1415 for (i=0; i<mat->rmap->n; i++) { 1416 for (j=B->i[i]; j<B->i[i+1]; j++) { 1417 if ( garray[B->j[j]] > cstart) break; 1418 column_values[cnt++] = B->a[j]; 1419 } 1420 for (k=A->i[i]; k<A->i[i+1]; k++) { 1421 column_values[cnt++] = A->a[k]; 1422 } 1423 for (; j<B->i[i+1]; j++) { 1424 column_values[cnt++] = B->a[j]; 1425 } 1426 } 1427 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1428 1429 /* store the column values to the file */ 1430 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1431 if (!rank) { 1432 MPI_Status status; 1433 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1434 for (i=1; i<size; i++) { 1435 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1436 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1437 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1438 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1439 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1440 } 1441 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1442 } else { 1443 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1444 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1445 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1446 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1447 } 1448 ierr = PetscFree(column_values);CHKERRQ(ierr); 1449 1450 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1451 if (file) { 1452 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1453 } 1454 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1461 { 1462 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1463 PetscErrorCode ierr; 1464 PetscMPIInt rank = aij->rank,size = aij->size; 1465 PetscBool isdraw,iascii,isbinary; 1466 PetscViewer sviewer; 1467 PetscViewerFormat format; 1468 1469 PetscFunctionBegin; 1470 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1471 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1472 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1473 if (iascii) { 1474 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1475 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1476 MatInfo info; 1477 PetscBool inodes; 1478 1479 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1480 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1481 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1482 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1483 if (!inodes) { 1484 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1485 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1486 } else { 1487 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1488 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1489 } 1490 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1491 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1492 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1493 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1494 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1495 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1496 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1497 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1500 PetscInt inodecount,inodelimit,*inodes; 1501 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1502 if (inodes) { 1503 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1504 } else { 1505 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1506 } 1507 PetscFunctionReturn(0); 1508 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1509 PetscFunctionReturn(0); 1510 } 1511 } else if (isbinary) { 1512 if (size == 1) { 1513 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1514 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1515 } else { 1516 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1517 } 1518 PetscFunctionReturn(0); 1519 } else if (isdraw) { 1520 PetscDraw draw; 1521 PetscBool isnull; 1522 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1523 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1524 } 1525 1526 if (size == 1) { 1527 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1528 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1529 } else { 1530 /* assemble the entire matrix onto first processor. */ 1531 Mat A; 1532 Mat_SeqAIJ *Aloc; 1533 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1534 MatScalar *a; 1535 1536 if (mat->rmap->N > 1024) { 1537 PetscBool flg = PETSC_FALSE; 1538 1539 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1540 if (!flg) { 1541 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1542 } 1543 } 1544 1545 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1546 if (!rank) { 1547 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1548 } else { 1549 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1550 } 1551 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1552 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1553 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1554 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1555 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1556 1557 /* copy over the A part */ 1558 Aloc = (Mat_SeqAIJ*)aij->A->data; 1559 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1560 row = mat->rmap->rstart; 1561 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1562 for (i=0; i<m; i++) { 1563 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1564 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1565 } 1566 aj = Aloc->j; 1567 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1568 1569 /* copy over the B part */ 1570 Aloc = (Mat_SeqAIJ*)aij->B->data; 1571 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1572 row = mat->rmap->rstart; 1573 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1574 ct = cols; 1575 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1576 for (i=0; i<m; i++) { 1577 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1578 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1579 } 1580 ierr = PetscFree(ct);CHKERRQ(ierr); 1581 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1582 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 /* 1584 Everyone has to call to draw the matrix since the graphics waits are 1585 synchronized across all processors that share the PetscDraw object 1586 */ 1587 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1588 if (!rank) { 1589 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1590 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1591 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1592 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1593 } 1594 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1595 ierr = MatDestroy(&A);CHKERRQ(ierr); 1596 } 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatView_MPIAIJ" 1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1603 { 1604 PetscErrorCode ierr; 1605 PetscBool iascii,isdraw,issocket,isbinary; 1606 1607 PetscFunctionBegin; 1608 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1609 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1610 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1611 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1612 if (iascii || isdraw || isbinary || issocket) { 1613 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1614 } else { 1615 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatSOR_MPIAIJ" 1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1623 { 1624 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1625 PetscErrorCode ierr; 1626 Vec bb1 = 0; 1627 PetscBool hasop; 1628 1629 PetscFunctionBegin; 1630 if (flag == SOR_APPLY_UPPER) { 1631 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1636 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1637 } 1638 1639 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 1645 while (its--) { 1646 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1648 1649 /* update rhs: bb1 = bb - B*x */ 1650 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1651 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1652 1653 /* local sweep */ 1654 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1655 } 1656 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1657 if (flag & SOR_ZERO_INITIAL_GUESS) { 1658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1659 its--; 1660 } 1661 while (its--) { 1662 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1664 1665 /* update rhs: bb1 = bb - B*x */ 1666 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1667 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1668 1669 /* local sweep */ 1670 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1671 } 1672 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1673 if (flag & SOR_ZERO_INITIAL_GUESS) { 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1675 its--; 1676 } 1677 while (its--) { 1678 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1680 1681 /* update rhs: bb1 = bb - B*x */ 1682 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1683 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1684 1685 /* local sweep */ 1686 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1687 } 1688 } else if (flag & SOR_EISENSTAT) { 1689 Vec xx1; 1690 1691 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1692 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1693 1694 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1695 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 if (!mat->diag) { 1697 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1698 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1699 } 1700 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1701 if (hasop) { 1702 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1703 } else { 1704 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1705 } 1706 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1707 1708 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1709 1710 /* local sweep */ 1711 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1712 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1713 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1714 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1715 1716 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatPermute_MPIAIJ" 1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1723 { 1724 Mat aA,aB,Aperm; 1725 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1726 PetscScalar *aa,*ba; 1727 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1728 PetscSF rowsf,sf; 1729 IS parcolp = PETSC_NULL; 1730 PetscBool done; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1735 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1736 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1737 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1738 1739 /* Invert row permutation to find out where my rows should go */ 1740 ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr); 1741 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1742 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1743 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1744 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1745 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1746 1747 /* Invert column permutation to find out where my columns should go */ 1748 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1749 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1750 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1751 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1752 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1753 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1754 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1755 1756 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1757 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1758 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1759 1760 /* Find out where my gcols should go */ 1761 ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr); 1762 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1763 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1764 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1765 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1766 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1767 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1768 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1769 1770 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1771 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1772 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1773 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1774 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1775 for (i=0; i<m; i++) { 1776 PetscInt row = rdest[i],rowner; 1777 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1778 for (j=ai[i]; j<ai[i+1]; j++) { 1779 PetscInt cowner,col = cdest[aj[j]]; 1780 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1781 if (rowner == cowner) dnnz[i]++; 1782 else onnz[i]++; 1783 } 1784 for (j=bi[i]; j<bi[i+1]; j++) { 1785 PetscInt cowner,col = gcdest[bj[j]]; 1786 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1787 if (rowner == cowner) dnnz[i]++; 1788 else onnz[i]++; 1789 } 1790 } 1791 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1792 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1793 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1794 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1795 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1796 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1797 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1798 1799 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1800 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1801 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1802 for (i=0; i<m; i++) { 1803 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1804 PetscInt rowlen; 1805 rowlen = ai[i+1] - ai[i]; 1806 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1807 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1808 rowlen = bi[i+1] - bi[i]; 1809 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1810 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1811 } 1812 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1813 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1814 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1815 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1816 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1817 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1818 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1819 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1820 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1821 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1822 *B = Aperm; 1823 PetscFunctionReturn(0); 1824 } 1825 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1828 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1829 { 1830 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1831 Mat A = mat->A,B = mat->B; 1832 PetscErrorCode ierr; 1833 PetscReal isend[5],irecv[5]; 1834 1835 PetscFunctionBegin; 1836 info->block_size = 1.0; 1837 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1838 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1839 isend[3] = info->memory; isend[4] = info->mallocs; 1840 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1841 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1842 isend[3] += info->memory; isend[4] += info->mallocs; 1843 if (flag == MAT_LOCAL) { 1844 info->nz_used = isend[0]; 1845 info->nz_allocated = isend[1]; 1846 info->nz_unneeded = isend[2]; 1847 info->memory = isend[3]; 1848 info->mallocs = isend[4]; 1849 } else if (flag == MAT_GLOBAL_MAX) { 1850 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1851 info->nz_used = irecv[0]; 1852 info->nz_allocated = irecv[1]; 1853 info->nz_unneeded = irecv[2]; 1854 info->memory = irecv[3]; 1855 info->mallocs = irecv[4]; 1856 } else if (flag == MAT_GLOBAL_SUM) { 1857 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1858 info->nz_used = irecv[0]; 1859 info->nz_allocated = irecv[1]; 1860 info->nz_unneeded = irecv[2]; 1861 info->memory = irecv[3]; 1862 info->mallocs = irecv[4]; 1863 } 1864 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1865 info->fill_ratio_needed = 0; 1866 info->factor_mallocs = 0; 1867 PetscFunctionReturn(0); 1868 } 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatSetOption_MPIAIJ" 1872 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1873 { 1874 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1875 PetscErrorCode ierr; 1876 1877 PetscFunctionBegin; 1878 switch (op) { 1879 case MAT_NEW_NONZERO_LOCATIONS: 1880 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1881 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1882 case MAT_KEEP_NONZERO_PATTERN: 1883 case MAT_NEW_NONZERO_LOCATION_ERR: 1884 case MAT_USE_INODES: 1885 case MAT_IGNORE_ZERO_ENTRIES: 1886 MatCheckPreallocated(A,1); 1887 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1888 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1889 break; 1890 case MAT_ROW_ORIENTED: 1891 a->roworiented = flg; 1892 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1893 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1894 break; 1895 case MAT_NEW_DIAGONALS: 1896 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1897 break; 1898 case MAT_IGNORE_OFF_PROC_ENTRIES: 1899 a->donotstash = flg; 1900 break; 1901 case MAT_SPD: 1902 A->spd_set = PETSC_TRUE; 1903 A->spd = flg; 1904 if (flg) { 1905 A->symmetric = PETSC_TRUE; 1906 A->structurally_symmetric = PETSC_TRUE; 1907 A->symmetric_set = PETSC_TRUE; 1908 A->structurally_symmetric_set = PETSC_TRUE; 1909 } 1910 break; 1911 case MAT_SYMMETRIC: 1912 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1913 break; 1914 case MAT_STRUCTURALLY_SYMMETRIC: 1915 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1916 break; 1917 case MAT_HERMITIAN: 1918 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1919 break; 1920 case MAT_SYMMETRY_ETERNAL: 1921 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1922 break; 1923 default: 1924 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1925 } 1926 PetscFunctionReturn(0); 1927 } 1928 1929 #undef __FUNCT__ 1930 #define __FUNCT__ "MatGetRow_MPIAIJ" 1931 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1932 { 1933 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1934 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1935 PetscErrorCode ierr; 1936 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1937 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1938 PetscInt *cmap,*idx_p; 1939 1940 PetscFunctionBegin; 1941 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1942 mat->getrowactive = PETSC_TRUE; 1943 1944 if (!mat->rowvalues && (idx || v)) { 1945 /* 1946 allocate enough space to hold information from the longest row. 1947 */ 1948 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1949 PetscInt max = 1,tmp; 1950 for (i=0; i<matin->rmap->n; i++) { 1951 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1952 if (max < tmp) { max = tmp; } 1953 } 1954 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1955 } 1956 1957 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1958 lrow = row - rstart; 1959 1960 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1961 if (!v) {pvA = 0; pvB = 0;} 1962 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1963 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1964 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1965 nztot = nzA + nzB; 1966 1967 cmap = mat->garray; 1968 if (v || idx) { 1969 if (nztot) { 1970 /* Sort by increasing column numbers, assuming A and B already sorted */ 1971 PetscInt imark = -1; 1972 if (v) { 1973 *v = v_p = mat->rowvalues; 1974 for (i=0; i<nzB; i++) { 1975 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1976 else break; 1977 } 1978 imark = i; 1979 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1980 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1981 } 1982 if (idx) { 1983 *idx = idx_p = mat->rowindices; 1984 if (imark > -1) { 1985 for (i=0; i<imark; i++) { 1986 idx_p[i] = cmap[cworkB[i]]; 1987 } 1988 } else { 1989 for (i=0; i<nzB; i++) { 1990 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1991 else break; 1992 } 1993 imark = i; 1994 } 1995 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1996 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1997 } 1998 } else { 1999 if (idx) *idx = 0; 2000 if (v) *v = 0; 2001 } 2002 } 2003 *nz = nztot; 2004 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 2005 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 2011 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 2015 PetscFunctionBegin; 2016 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2017 aij->getrowactive = PETSC_FALSE; 2018 PetscFunctionReturn(0); 2019 } 2020 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "MatNorm_MPIAIJ" 2023 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2024 { 2025 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2026 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2027 PetscErrorCode ierr; 2028 PetscInt i,j,cstart = mat->cmap->rstart; 2029 PetscReal sum = 0.0; 2030 MatScalar *v; 2031 2032 PetscFunctionBegin; 2033 if (aij->size == 1) { 2034 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2035 } else { 2036 if (type == NORM_FROBENIUS) { 2037 v = amat->a; 2038 for (i=0; i<amat->nz; i++) { 2039 #if defined(PETSC_USE_COMPLEX) 2040 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2041 #else 2042 sum += (*v)*(*v); v++; 2043 #endif 2044 } 2045 v = bmat->a; 2046 for (i=0; i<bmat->nz; i++) { 2047 #if defined(PETSC_USE_COMPLEX) 2048 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2049 #else 2050 sum += (*v)*(*v); v++; 2051 #endif 2052 } 2053 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2054 *norm = PetscSqrtReal(*norm); 2055 } else if (type == NORM_1) { /* max column norm */ 2056 PetscReal *tmp,*tmp2; 2057 PetscInt *jj,*garray = aij->garray; 2058 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2059 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2060 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2061 *norm = 0.0; 2062 v = amat->a; jj = amat->j; 2063 for (j=0; j<amat->nz; j++) { 2064 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 2065 } 2066 v = bmat->a; jj = bmat->j; 2067 for (j=0; j<bmat->nz; j++) { 2068 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2069 } 2070 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2071 for (j=0; j<mat->cmap->N; j++) { 2072 if (tmp2[j] > *norm) *norm = tmp2[j]; 2073 } 2074 ierr = PetscFree(tmp);CHKERRQ(ierr); 2075 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2076 } else if (type == NORM_INFINITY) { /* max row norm */ 2077 PetscReal ntemp = 0.0; 2078 for (j=0; j<aij->A->rmap->n; j++) { 2079 v = amat->a + amat->i[j]; 2080 sum = 0.0; 2081 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2082 sum += PetscAbsScalar(*v); v++; 2083 } 2084 v = bmat->a + bmat->i[j]; 2085 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2086 sum += PetscAbsScalar(*v); v++; 2087 } 2088 if (sum > ntemp) ntemp = sum; 2089 } 2090 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2091 } else { 2092 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2093 } 2094 } 2095 PetscFunctionReturn(0); 2096 } 2097 2098 #undef __FUNCT__ 2099 #define __FUNCT__ "MatTranspose_MPIAIJ" 2100 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2101 { 2102 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2103 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2104 PetscErrorCode ierr; 2105 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2106 PetscInt cstart=A->cmap->rstart,ncol; 2107 Mat B; 2108 MatScalar *array; 2109 2110 PetscFunctionBegin; 2111 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2112 2113 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2114 ai = Aloc->i; aj = Aloc->j; 2115 bi = Bloc->i; bj = Bloc->j; 2116 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2117 PetscInt *d_nnz,*g_nnz,*o_nnz; 2118 PetscSFNode *oloc; 2119 PETSC_UNUSED PetscSF sf; 2120 2121 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2122 /* compute d_nnz for preallocation */ 2123 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2124 for (i=0; i<ai[ma]; i++) { 2125 d_nnz[aj[i]] ++; 2126 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2127 } 2128 /* compute local off-diagonal contributions */ 2129 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2130 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2131 /* map those to global */ 2132 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 2133 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2134 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2135 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2136 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2137 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2138 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2139 2140 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2141 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2142 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2143 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2144 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2145 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2146 } else { 2147 B = *matout; 2148 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2149 for (i=0; i<ai[ma]; i++){ 2150 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2151 } 2152 } 2153 2154 /* copy over the A part */ 2155 array = Aloc->a; 2156 row = A->rmap->rstart; 2157 for (i=0; i<ma; i++) { 2158 ncol = ai[i+1]-ai[i]; 2159 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2160 row++; array += ncol; aj += ncol; 2161 } 2162 aj = Aloc->j; 2163 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2164 2165 /* copy over the B part */ 2166 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2167 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2168 array = Bloc->a; 2169 row = A->rmap->rstart; 2170 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2171 cols_tmp = cols; 2172 for (i=0; i<mb; i++) { 2173 ncol = bi[i+1]-bi[i]; 2174 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2175 row++; array += ncol; cols_tmp += ncol; 2176 } 2177 ierr = PetscFree(cols);CHKERRQ(ierr); 2178 2179 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2180 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2181 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2182 *matout = B; 2183 } else { 2184 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2185 } 2186 PetscFunctionReturn(0); 2187 } 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2191 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2192 { 2193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2194 Mat a = aij->A,b = aij->B; 2195 PetscErrorCode ierr; 2196 PetscInt s1,s2,s3; 2197 2198 PetscFunctionBegin; 2199 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2200 if (rr) { 2201 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2202 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2203 /* Overlap communication with computation. */ 2204 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2205 } 2206 if (ll) { 2207 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2208 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2209 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2210 } 2211 /* scale the diagonal block */ 2212 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2213 2214 if (rr) { 2215 /* Do a scatter end and then right scale the off-diagonal block */ 2216 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2217 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2218 } 2219 2220 PetscFunctionReturn(0); 2221 } 2222 2223 #undef __FUNCT__ 2224 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2225 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2226 { 2227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2228 PetscErrorCode ierr; 2229 2230 PetscFunctionBegin; 2231 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2232 PetscFunctionReturn(0); 2233 } 2234 2235 #undef __FUNCT__ 2236 #define __FUNCT__ "MatEqual_MPIAIJ" 2237 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2238 { 2239 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2240 Mat a,b,c,d; 2241 PetscBool flg; 2242 PetscErrorCode ierr; 2243 2244 PetscFunctionBegin; 2245 a = matA->A; b = matA->B; 2246 c = matB->A; d = matB->B; 2247 2248 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2249 if (flg) { 2250 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2251 } 2252 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatCopy_MPIAIJ" 2258 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2259 { 2260 PetscErrorCode ierr; 2261 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2262 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2263 2264 PetscFunctionBegin; 2265 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2266 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2267 /* because of the column compression in the off-processor part of the matrix a->B, 2268 the number of columns in a->B and b->B may be different, hence we cannot call 2269 the MatCopy() directly on the two parts. If need be, we can provide a more 2270 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2271 then copying the submatrices */ 2272 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2273 } else { 2274 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2275 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2276 } 2277 PetscFunctionReturn(0); 2278 } 2279 2280 #undef __FUNCT__ 2281 #define __FUNCT__ "MatSetUp_MPIAIJ" 2282 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2283 { 2284 PetscErrorCode ierr; 2285 2286 PetscFunctionBegin; 2287 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2293 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2294 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2295 { 2296 PetscInt i,m=Y->rmap->N; 2297 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2298 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2299 const PetscInt *xi = x->i,*yi = y->i; 2300 2301 PetscFunctionBegin; 2302 /* Set the number of nonzeros in the new matrix */ 2303 for (i=0; i<m; i++) { 2304 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2305 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2306 nnz[i] = 0; 2307 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2308 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2309 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2310 nnz[i]++; 2311 } 2312 for (; k<nzy; k++) nnz[i]++; 2313 } 2314 PetscFunctionReturn(0); 2315 } 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatAXPY_MPIAIJ" 2319 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2320 { 2321 PetscErrorCode ierr; 2322 PetscInt i; 2323 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2324 PetscBLASInt bnz,one=1; 2325 Mat_SeqAIJ *x,*y; 2326 2327 PetscFunctionBegin; 2328 if (str == SAME_NONZERO_PATTERN) { 2329 PetscScalar alpha = a; 2330 x = (Mat_SeqAIJ *)xx->A->data; 2331 y = (Mat_SeqAIJ *)yy->A->data; 2332 bnz = PetscBLASIntCast(x->nz); 2333 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2334 x = (Mat_SeqAIJ *)xx->B->data; 2335 y = (Mat_SeqAIJ *)yy->B->data; 2336 bnz = PetscBLASIntCast(x->nz); 2337 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2338 } else if (str == SUBSET_NONZERO_PATTERN) { 2339 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2340 2341 x = (Mat_SeqAIJ *)xx->B->data; 2342 y = (Mat_SeqAIJ *)yy->B->data; 2343 if (y->xtoy && y->XtoY != xx->B) { 2344 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2345 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2346 } 2347 if (!y->xtoy) { /* get xtoy */ 2348 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2349 y->XtoY = xx->B; 2350 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2351 } 2352 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2353 } else { 2354 Mat B; 2355 PetscInt *nnz_d,*nnz_o; 2356 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2357 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2358 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2359 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2360 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2361 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2362 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2363 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2364 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2365 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2366 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2367 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2368 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2369 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2370 } 2371 PetscFunctionReturn(0); 2372 } 2373 2374 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2375 2376 #undef __FUNCT__ 2377 #define __FUNCT__ "MatConjugate_MPIAIJ" 2378 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2379 { 2380 #if defined(PETSC_USE_COMPLEX) 2381 PetscErrorCode ierr; 2382 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2383 2384 PetscFunctionBegin; 2385 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2386 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2387 #else 2388 PetscFunctionBegin; 2389 #endif 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatRealPart_MPIAIJ" 2395 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2396 { 2397 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2398 PetscErrorCode ierr; 2399 2400 PetscFunctionBegin; 2401 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2402 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2403 PetscFunctionReturn(0); 2404 } 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2408 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2409 { 2410 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2411 PetscErrorCode ierr; 2412 2413 PetscFunctionBegin; 2414 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2415 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2416 PetscFunctionReturn(0); 2417 } 2418 2419 #ifdef PETSC_HAVE_PBGL 2420 2421 #include <boost/parallel/mpi/bsp_process_group.hpp> 2422 #include <boost/graph/distributed/ilu_default_graph.hpp> 2423 #include <boost/graph/distributed/ilu_0_block.hpp> 2424 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2425 #include <boost/graph/distributed/petsc/interface.hpp> 2426 #include <boost/multi_array.hpp> 2427 #include <boost/parallel/distributed_property_map->hpp> 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2431 /* 2432 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2433 */ 2434 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2435 { 2436 namespace petsc = boost::distributed::petsc; 2437 2438 namespace graph_dist = boost::graph::distributed; 2439 using boost::graph::distributed::ilu_default::process_group_type; 2440 using boost::graph::ilu_permuted; 2441 2442 PetscBool row_identity, col_identity; 2443 PetscContainer c; 2444 PetscInt m, n, M, N; 2445 PetscErrorCode ierr; 2446 2447 PetscFunctionBegin; 2448 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2449 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2450 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2451 if (!row_identity || !col_identity) { 2452 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2453 } 2454 2455 process_group_type pg; 2456 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2457 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2458 lgraph_type& level_graph = *lgraph_p; 2459 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2460 2461 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2462 ilu_permuted(level_graph); 2463 2464 /* put together the new matrix */ 2465 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2466 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2467 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2468 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2469 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2470 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2471 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2472 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2473 2474 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2475 ierr = PetscContainerSetPointer(c, lgraph_p); 2476 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2477 ierr = PetscContainerDestroy(&c); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2483 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2484 { 2485 PetscFunctionBegin; 2486 PetscFunctionReturn(0); 2487 } 2488 2489 #undef __FUNCT__ 2490 #define __FUNCT__ "MatSolve_MPIAIJ" 2491 /* 2492 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2493 */ 2494 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2495 { 2496 namespace graph_dist = boost::graph::distributed; 2497 2498 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2499 lgraph_type* lgraph_p; 2500 PetscContainer c; 2501 PetscErrorCode ierr; 2502 2503 PetscFunctionBegin; 2504 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2505 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2506 ierr = VecCopy(b, x);CHKERRQ(ierr); 2507 2508 PetscScalar* array_x; 2509 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2510 PetscInt sx; 2511 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2512 2513 PetscScalar* array_b; 2514 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2515 PetscInt sb; 2516 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2517 2518 lgraph_type& level_graph = *lgraph_p; 2519 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2520 2521 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2522 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2523 ref_x(array_x, boost::extents[num_vertices(graph)]); 2524 2525 typedef boost::iterator_property_map<array_ref_type::iterator, 2526 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2527 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2528 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2529 2530 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2531 2532 PetscFunctionReturn(0); 2533 } 2534 #endif 2535 2536 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2537 PetscInt nzlocal,nsends,nrecvs; 2538 PetscMPIInt *send_rank,*recv_rank; 2539 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2540 PetscScalar *sbuf_a,**rbuf_a; 2541 PetscErrorCode (*Destroy)(Mat); 2542 } Mat_Redundant; 2543 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2546 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2547 { 2548 PetscErrorCode ierr; 2549 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2550 PetscInt i; 2551 2552 PetscFunctionBegin; 2553 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2554 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2555 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2556 for (i=0; i<redund->nrecvs; i++){ 2557 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2558 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2559 } 2560 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2561 ierr = PetscFree(redund);CHKERRQ(ierr); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatDestroy_MatRedundant" 2567 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2568 { 2569 PetscErrorCode ierr; 2570 PetscContainer container; 2571 Mat_Redundant *redund=PETSC_NULL; 2572 2573 PetscFunctionBegin; 2574 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2575 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2576 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2577 A->ops->destroy = redund->Destroy; 2578 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2579 if (A->ops->destroy) { 2580 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2581 } 2582 PetscFunctionReturn(0); 2583 } 2584 2585 #undef __FUNCT__ 2586 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2587 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2588 { 2589 PetscMPIInt rank,size; 2590 MPI_Comm comm=((PetscObject)mat)->comm; 2591 PetscErrorCode ierr; 2592 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2593 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2594 PetscInt *rowrange=mat->rmap->range; 2595 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2596 Mat A=aij->A,B=aij->B,C=*matredundant; 2597 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2598 PetscScalar *sbuf_a; 2599 PetscInt nzlocal=a->nz+b->nz; 2600 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2601 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2602 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2603 MatScalar *aworkA,*aworkB; 2604 PetscScalar *vals; 2605 PetscMPIInt tag1,tag2,tag3,imdex; 2606 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2607 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2608 MPI_Status recv_status,*send_status; 2609 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2610 PetscInt **rbuf_j=PETSC_NULL; 2611 PetscScalar **rbuf_a=PETSC_NULL; 2612 Mat_Redundant *redund=PETSC_NULL; 2613 PetscContainer container; 2614 2615 PetscFunctionBegin; 2616 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2617 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2618 2619 if (reuse == MAT_REUSE_MATRIX) { 2620 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2621 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2622 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2623 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2624 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2625 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2626 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2627 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2628 2629 nsends = redund->nsends; 2630 nrecvs = redund->nrecvs; 2631 send_rank = redund->send_rank; 2632 recv_rank = redund->recv_rank; 2633 sbuf_nz = redund->sbuf_nz; 2634 rbuf_nz = redund->rbuf_nz; 2635 sbuf_j = redund->sbuf_j; 2636 sbuf_a = redund->sbuf_a; 2637 rbuf_j = redund->rbuf_j; 2638 rbuf_a = redund->rbuf_a; 2639 } 2640 2641 if (reuse == MAT_INITIAL_MATRIX){ 2642 PetscMPIInt subrank,subsize; 2643 PetscInt nleftover,np_subcomm; 2644 /* get the destination processors' id send_rank, nsends and nrecvs */ 2645 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2646 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2647 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2648 np_subcomm = size/nsubcomm; 2649 nleftover = size - nsubcomm*np_subcomm; 2650 nsends = 0; nrecvs = 0; 2651 for (i=0; i<size; i++){ /* i=rank*/ 2652 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2653 send_rank[nsends] = i; nsends++; 2654 recv_rank[nrecvs++] = i; 2655 } 2656 } 2657 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2658 i = size-nleftover-1; 2659 j = 0; 2660 while (j < nsubcomm - nleftover){ 2661 send_rank[nsends++] = i; 2662 i--; j++; 2663 } 2664 } 2665 2666 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2667 for (i=0; i<nleftover; i++){ 2668 recv_rank[nrecvs++] = size-nleftover+i; 2669 } 2670 } 2671 2672 /* allocate sbuf_j, sbuf_a */ 2673 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2674 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2675 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2676 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2677 2678 /* copy mat's local entries into the buffers */ 2679 if (reuse == MAT_INITIAL_MATRIX){ 2680 rownz_max = 0; 2681 rptr = sbuf_j; 2682 cols = sbuf_j + rend-rstart + 1; 2683 vals = sbuf_a; 2684 rptr[0] = 0; 2685 for (i=0; i<rend-rstart; i++){ 2686 row = i + rstart; 2687 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2688 ncols = nzA + nzB; 2689 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2690 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2691 /* load the column indices for this row into cols */ 2692 lwrite = 0; 2693 for (l=0; l<nzB; l++) { 2694 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2695 vals[lwrite] = aworkB[l]; 2696 cols[lwrite++] = ctmp; 2697 } 2698 } 2699 for (l=0; l<nzA; l++){ 2700 vals[lwrite] = aworkA[l]; 2701 cols[lwrite++] = cstart + cworkA[l]; 2702 } 2703 for (l=0; l<nzB; l++) { 2704 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2705 vals[lwrite] = aworkB[l]; 2706 cols[lwrite++] = ctmp; 2707 } 2708 } 2709 vals += ncols; 2710 cols += ncols; 2711 rptr[i+1] = rptr[i] + ncols; 2712 if (rownz_max < ncols) rownz_max = ncols; 2713 } 2714 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2715 } else { /* only copy matrix values into sbuf_a */ 2716 rptr = sbuf_j; 2717 vals = sbuf_a; 2718 rptr[0] = 0; 2719 for (i=0; i<rend-rstart; i++){ 2720 row = i + rstart; 2721 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2722 ncols = nzA + nzB; 2723 cworkB = b->j + b->i[i]; 2724 aworkA = a->a + a->i[i]; 2725 aworkB = b->a + b->i[i]; 2726 lwrite = 0; 2727 for (l=0; l<nzB; l++) { 2728 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2729 } 2730 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2731 for (l=0; l<nzB; l++) { 2732 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2733 } 2734 vals += ncols; 2735 rptr[i+1] = rptr[i] + ncols; 2736 } 2737 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2738 2739 /* send nzlocal to others, and recv other's nzlocal */ 2740 /*--------------------------------------------------*/ 2741 if (reuse == MAT_INITIAL_MATRIX){ 2742 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2743 s_waits2 = s_waits3 + nsends; 2744 s_waits1 = s_waits2 + nsends; 2745 r_waits1 = s_waits1 + nsends; 2746 r_waits2 = r_waits1 + nrecvs; 2747 r_waits3 = r_waits2 + nrecvs; 2748 } else { 2749 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2750 r_waits3 = s_waits3 + nsends; 2751 } 2752 2753 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2754 if (reuse == MAT_INITIAL_MATRIX){ 2755 /* get new tags to keep the communication clean */ 2756 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2757 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2758 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2759 2760 /* post receives of other's nzlocal */ 2761 for (i=0; i<nrecvs; i++){ 2762 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2763 } 2764 /* send nzlocal to others */ 2765 for (i=0; i<nsends; i++){ 2766 sbuf_nz[i] = nzlocal; 2767 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2768 } 2769 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2770 count = nrecvs; 2771 while (count) { 2772 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2773 recv_rank[imdex] = recv_status.MPI_SOURCE; 2774 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2775 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2776 2777 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2778 rbuf_nz[imdex] += i + 2; 2779 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2780 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2781 count--; 2782 } 2783 /* wait on sends of nzlocal */ 2784 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2785 /* send mat->i,j to others, and recv from other's */ 2786 /*------------------------------------------------*/ 2787 for (i=0; i<nsends; i++){ 2788 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2789 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2790 } 2791 /* wait on receives of mat->i,j */ 2792 /*------------------------------*/ 2793 count = nrecvs; 2794 while (count) { 2795 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2796 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2797 count--; 2798 } 2799 /* wait on sends of mat->i,j */ 2800 /*---------------------------*/ 2801 if (nsends) { 2802 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2803 } 2804 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2805 2806 /* post receives, send and receive mat->a */ 2807 /*----------------------------------------*/ 2808 for (imdex=0; imdex<nrecvs; imdex++) { 2809 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2810 } 2811 for (i=0; i<nsends; i++){ 2812 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2813 } 2814 count = nrecvs; 2815 while (count) { 2816 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2817 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2818 count--; 2819 } 2820 if (nsends) { 2821 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2822 } 2823 2824 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2825 2826 /* create redundant matrix */ 2827 /*-------------------------*/ 2828 if (reuse == MAT_INITIAL_MATRIX){ 2829 /* compute rownz_max for preallocation */ 2830 for (imdex=0; imdex<nrecvs; imdex++){ 2831 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2832 rptr = rbuf_j[imdex]; 2833 for (i=0; i<j; i++){ 2834 ncols = rptr[i+1] - rptr[i]; 2835 if (rownz_max < ncols) rownz_max = ncols; 2836 } 2837 } 2838 2839 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2840 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2841 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2842 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2843 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2844 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2845 } else { 2846 C = *matredundant; 2847 } 2848 2849 /* insert local matrix entries */ 2850 rptr = sbuf_j; 2851 cols = sbuf_j + rend-rstart + 1; 2852 vals = sbuf_a; 2853 for (i=0; i<rend-rstart; i++){ 2854 row = i + rstart; 2855 ncols = rptr[i+1] - rptr[i]; 2856 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2857 vals += ncols; 2858 cols += ncols; 2859 } 2860 /* insert received matrix entries */ 2861 for (imdex=0; imdex<nrecvs; imdex++){ 2862 rstart = rowrange[recv_rank[imdex]]; 2863 rend = rowrange[recv_rank[imdex]+1]; 2864 rptr = rbuf_j[imdex]; 2865 cols = rbuf_j[imdex] + rend-rstart + 1; 2866 vals = rbuf_a[imdex]; 2867 for (i=0; i<rend-rstart; i++){ 2868 row = i + rstart; 2869 ncols = rptr[i+1] - rptr[i]; 2870 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2871 vals += ncols; 2872 cols += ncols; 2873 } 2874 } 2875 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2876 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2877 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2878 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2879 if (reuse == MAT_INITIAL_MATRIX) { 2880 PetscContainer container; 2881 *matredundant = C; 2882 /* create a supporting struct and attach it to C for reuse */ 2883 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2884 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2885 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2886 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2887 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2888 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2889 2890 redund->nzlocal = nzlocal; 2891 redund->nsends = nsends; 2892 redund->nrecvs = nrecvs; 2893 redund->send_rank = send_rank; 2894 redund->recv_rank = recv_rank; 2895 redund->sbuf_nz = sbuf_nz; 2896 redund->rbuf_nz = rbuf_nz; 2897 redund->sbuf_j = sbuf_j; 2898 redund->sbuf_a = sbuf_a; 2899 redund->rbuf_j = rbuf_j; 2900 redund->rbuf_a = rbuf_a; 2901 2902 redund->Destroy = C->ops->destroy; 2903 C->ops->destroy = MatDestroy_MatRedundant; 2904 } 2905 PetscFunctionReturn(0); 2906 } 2907 2908 #undef __FUNCT__ 2909 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2910 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2911 { 2912 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2913 PetscErrorCode ierr; 2914 PetscInt i,*idxb = 0; 2915 PetscScalar *va,*vb; 2916 Vec vtmp; 2917 2918 PetscFunctionBegin; 2919 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2920 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2921 if (idx) { 2922 for (i=0; i<A->rmap->n; i++) { 2923 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2924 } 2925 } 2926 2927 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2928 if (idx) { 2929 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2930 } 2931 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2932 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2933 2934 for (i=0; i<A->rmap->n; i++){ 2935 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2936 va[i] = vb[i]; 2937 if (idx) idx[i] = a->garray[idxb[i]]; 2938 } 2939 } 2940 2941 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2942 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2943 ierr = PetscFree(idxb);CHKERRQ(ierr); 2944 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2945 PetscFunctionReturn(0); 2946 } 2947 2948 #undef __FUNCT__ 2949 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2950 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2951 { 2952 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2953 PetscErrorCode ierr; 2954 PetscInt i,*idxb = 0; 2955 PetscScalar *va,*vb; 2956 Vec vtmp; 2957 2958 PetscFunctionBegin; 2959 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2960 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2961 if (idx) { 2962 for (i=0; i<A->cmap->n; i++) { 2963 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2964 } 2965 } 2966 2967 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2968 if (idx) { 2969 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2970 } 2971 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2972 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2973 2974 for (i=0; i<A->rmap->n; i++){ 2975 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2976 va[i] = vb[i]; 2977 if (idx) idx[i] = a->garray[idxb[i]]; 2978 } 2979 } 2980 2981 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2982 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2983 ierr = PetscFree(idxb);CHKERRQ(ierr); 2984 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2985 PetscFunctionReturn(0); 2986 } 2987 2988 #undef __FUNCT__ 2989 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2990 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2991 { 2992 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2993 PetscInt n = A->rmap->n; 2994 PetscInt cstart = A->cmap->rstart; 2995 PetscInt *cmap = mat->garray; 2996 PetscInt *diagIdx, *offdiagIdx; 2997 Vec diagV, offdiagV; 2998 PetscScalar *a, *diagA, *offdiagA; 2999 PetscInt r; 3000 PetscErrorCode ierr; 3001 3002 PetscFunctionBegin; 3003 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3004 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 3005 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 3006 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3007 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3008 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3009 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3010 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3011 for (r = 0; r < n; ++r) { 3012 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3013 a[r] = diagA[r]; 3014 idx[r] = cstart + diagIdx[r]; 3015 } else { 3016 a[r] = offdiagA[r]; 3017 idx[r] = cmap[offdiagIdx[r]]; 3018 } 3019 } 3020 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3021 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3022 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3023 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3024 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3025 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3026 PetscFunctionReturn(0); 3027 } 3028 3029 #undef __FUNCT__ 3030 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3031 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3032 { 3033 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 3034 PetscInt n = A->rmap->n; 3035 PetscInt cstart = A->cmap->rstart; 3036 PetscInt *cmap = mat->garray; 3037 PetscInt *diagIdx, *offdiagIdx; 3038 Vec diagV, offdiagV; 3039 PetscScalar *a, *diagA, *offdiagA; 3040 PetscInt r; 3041 PetscErrorCode ierr; 3042 3043 PetscFunctionBegin; 3044 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3045 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3046 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3047 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3048 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3049 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3050 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3051 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3052 for (r = 0; r < n; ++r) { 3053 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3054 a[r] = diagA[r]; 3055 idx[r] = cstart + diagIdx[r]; 3056 } else { 3057 a[r] = offdiagA[r]; 3058 idx[r] = cmap[offdiagIdx[r]]; 3059 } 3060 } 3061 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3062 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3063 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3064 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3065 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3066 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3067 PetscFunctionReturn(0); 3068 } 3069 3070 #undef __FUNCT__ 3071 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3072 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3073 { 3074 PetscErrorCode ierr; 3075 Mat *dummy; 3076 3077 PetscFunctionBegin; 3078 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3079 *newmat = *dummy; 3080 ierr = PetscFree(dummy);CHKERRQ(ierr); 3081 PetscFunctionReturn(0); 3082 } 3083 3084 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3085 3086 #undef __FUNCT__ 3087 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3088 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3089 { 3090 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3091 PetscErrorCode ierr; 3092 3093 PetscFunctionBegin; 3094 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3095 PetscFunctionReturn(0); 3096 } 3097 3098 #undef __FUNCT__ 3099 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3100 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3101 { 3102 PetscErrorCode ierr; 3103 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3104 3105 PetscFunctionBegin; 3106 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3107 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3108 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3109 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3110 PetscFunctionReturn(0); 3111 } 3112 3113 /* -------------------------------------------------------------------*/ 3114 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3115 MatGetRow_MPIAIJ, 3116 MatRestoreRow_MPIAIJ, 3117 MatMult_MPIAIJ, 3118 /* 4*/ MatMultAdd_MPIAIJ, 3119 MatMultTranspose_MPIAIJ, 3120 MatMultTransposeAdd_MPIAIJ, 3121 #ifdef PETSC_HAVE_PBGL 3122 MatSolve_MPIAIJ, 3123 #else 3124 0, 3125 #endif 3126 0, 3127 0, 3128 /*10*/ 0, 3129 0, 3130 0, 3131 MatSOR_MPIAIJ, 3132 MatTranspose_MPIAIJ, 3133 /*15*/ MatGetInfo_MPIAIJ, 3134 MatEqual_MPIAIJ, 3135 MatGetDiagonal_MPIAIJ, 3136 MatDiagonalScale_MPIAIJ, 3137 MatNorm_MPIAIJ, 3138 /*20*/ MatAssemblyBegin_MPIAIJ, 3139 MatAssemblyEnd_MPIAIJ, 3140 MatSetOption_MPIAIJ, 3141 MatZeroEntries_MPIAIJ, 3142 /*24*/ MatZeroRows_MPIAIJ, 3143 0, 3144 #ifdef PETSC_HAVE_PBGL 3145 0, 3146 #else 3147 0, 3148 #endif 3149 0, 3150 0, 3151 /*29*/ MatSetUp_MPIAIJ, 3152 #ifdef PETSC_HAVE_PBGL 3153 0, 3154 #else 3155 0, 3156 #endif 3157 0, 3158 0, 3159 0, 3160 /*34*/ MatDuplicate_MPIAIJ, 3161 0, 3162 0, 3163 0, 3164 0, 3165 /*39*/ MatAXPY_MPIAIJ, 3166 MatGetSubMatrices_MPIAIJ, 3167 MatIncreaseOverlap_MPIAIJ, 3168 MatGetValues_MPIAIJ, 3169 MatCopy_MPIAIJ, 3170 /*44*/ MatGetRowMax_MPIAIJ, 3171 MatScale_MPIAIJ, 3172 0, 3173 0, 3174 MatZeroRowsColumns_MPIAIJ, 3175 /*49*/ MatSetRandom_MPIAIJ, 3176 0, 3177 0, 3178 0, 3179 0, 3180 /*54*/ MatFDColoringCreate_MPIAIJ, 3181 0, 3182 MatSetUnfactored_MPIAIJ, 3183 MatPermute_MPIAIJ, 3184 0, 3185 /*59*/ MatGetSubMatrix_MPIAIJ, 3186 MatDestroy_MPIAIJ, 3187 MatView_MPIAIJ, 3188 0, 3189 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3190 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3191 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3192 0, 3193 0, 3194 0, 3195 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3196 MatGetRowMinAbs_MPIAIJ, 3197 0, 3198 MatSetColoring_MPIAIJ, 3199 0, 3200 MatSetValuesAdifor_MPIAIJ, 3201 /*75*/ MatFDColoringApply_AIJ, 3202 0, 3203 0, 3204 0, 3205 MatFindZeroDiagonals_MPIAIJ, 3206 /*80*/ 0, 3207 0, 3208 0, 3209 /*83*/ MatLoad_MPIAIJ, 3210 0, 3211 0, 3212 0, 3213 0, 3214 0, 3215 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3216 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3217 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3218 MatPtAP_MPIAIJ_MPIAIJ, 3219 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3220 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3221 0, 3222 0, 3223 0, 3224 0, 3225 /*99*/ 0, 3226 0, 3227 0, 3228 MatConjugate_MPIAIJ, 3229 0, 3230 /*104*/MatSetValuesRow_MPIAIJ, 3231 MatRealPart_MPIAIJ, 3232 MatImaginaryPart_MPIAIJ, 3233 0, 3234 0, 3235 /*109*/0, 3236 MatGetRedundantMatrix_MPIAIJ, 3237 MatGetRowMin_MPIAIJ, 3238 0, 3239 0, 3240 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3241 0, 3242 0, 3243 0, 3244 0, 3245 /*119*/0, 3246 0, 3247 0, 3248 0, 3249 MatGetMultiProcBlock_MPIAIJ, 3250 /*124*/MatFindNonzeroRows_MPIAIJ, 3251 MatGetColumnNorms_MPIAIJ, 3252 MatInvertBlockDiagonal_MPIAIJ, 3253 0, 3254 MatGetSubMatricesParallel_MPIAIJ, 3255 /*129*/0, 3256 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3257 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3258 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3259 0, 3260 /*134*/0, 3261 0, 3262 0, 3263 0, 3264 0 3265 }; 3266 3267 /* ----------------------------------------------------------------------------------------*/ 3268 3269 EXTERN_C_BEGIN 3270 #undef __FUNCT__ 3271 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3272 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3273 { 3274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3275 PetscErrorCode ierr; 3276 3277 PetscFunctionBegin; 3278 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3279 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3280 PetscFunctionReturn(0); 3281 } 3282 EXTERN_C_END 3283 3284 EXTERN_C_BEGIN 3285 #undef __FUNCT__ 3286 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3287 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3288 { 3289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3290 PetscErrorCode ierr; 3291 3292 PetscFunctionBegin; 3293 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3294 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3295 PetscFunctionReturn(0); 3296 } 3297 EXTERN_C_END 3298 3299 EXTERN_C_BEGIN 3300 #undef __FUNCT__ 3301 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3302 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3303 { 3304 Mat_MPIAIJ *b; 3305 PetscErrorCode ierr; 3306 PetscInt i; 3307 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3308 3309 PetscFunctionBegin; 3310 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3311 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3312 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3313 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3314 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3315 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3316 3317 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3318 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3319 if (d_nnz) { 3320 for (i=0; i<B->rmap->n; i++) { 3321 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3322 } 3323 } 3324 if (o_nnz) { 3325 for (i=0; i<B->rmap->n; i++) { 3326 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3327 } 3328 } 3329 b = (Mat_MPIAIJ*)B->data; 3330 3331 if (!B->preallocated) { 3332 /* Explicitly create 2 MATSEQAIJ matrices. */ 3333 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3334 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3335 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3336 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3337 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3338 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3339 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3340 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3341 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3342 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3343 } 3344 3345 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3346 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3347 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3348 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3349 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3350 B->preallocated = PETSC_TRUE; 3351 PetscFunctionReturn(0); 3352 } 3353 EXTERN_C_END 3354 3355 #undef __FUNCT__ 3356 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3357 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3358 { 3359 Mat mat; 3360 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3361 PetscErrorCode ierr; 3362 3363 PetscFunctionBegin; 3364 *newmat = 0; 3365 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3366 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3367 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3368 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3369 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3370 a = (Mat_MPIAIJ*)mat->data; 3371 3372 mat->factortype = matin->factortype; 3373 mat->rmap->bs = matin->rmap->bs; 3374 mat->cmap->bs = matin->cmap->bs; 3375 mat->assembled = PETSC_TRUE; 3376 mat->insertmode = NOT_SET_VALUES; 3377 mat->preallocated = PETSC_TRUE; 3378 3379 a->size = oldmat->size; 3380 a->rank = oldmat->rank; 3381 a->donotstash = oldmat->donotstash; 3382 a->roworiented = oldmat->roworiented; 3383 a->rowindices = 0; 3384 a->rowvalues = 0; 3385 a->getrowactive = PETSC_FALSE; 3386 3387 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3388 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3389 3390 if (oldmat->colmap) { 3391 #if defined (PETSC_USE_CTABLE) 3392 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3393 #else 3394 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3395 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3396 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3397 #endif 3398 } else a->colmap = 0; 3399 if (oldmat->garray) { 3400 PetscInt len; 3401 len = oldmat->B->cmap->n; 3402 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3403 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3404 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3405 } else a->garray = 0; 3406 3407 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3408 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3409 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3410 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3411 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3412 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3413 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3414 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3415 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3416 *newmat = mat; 3417 PetscFunctionReturn(0); 3418 } 3419 3420 3421 3422 #undef __FUNCT__ 3423 #define __FUNCT__ "MatLoad_MPIAIJ" 3424 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3425 { 3426 PetscScalar *vals,*svals; 3427 MPI_Comm comm = ((PetscObject)viewer)->comm; 3428 PetscErrorCode ierr; 3429 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3430 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3431 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3432 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3433 PetscInt cend,cstart,n,*rowners,sizesset=1; 3434 int fd; 3435 PetscInt bs = 1; 3436 3437 PetscFunctionBegin; 3438 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3439 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3440 if (!rank) { 3441 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3442 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3443 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3444 } 3445 3446 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3447 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3448 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3449 3450 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3451 3452 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3453 M = header[1]; N = header[2]; 3454 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3455 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3456 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3457 3458 /* If global sizes are set, check if they are consistent with that given in the file */ 3459 if (sizesset) { 3460 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3461 } 3462 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3463 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3464 3465 /* determine ownership of all (block) rows */ 3466 if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3467 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3468 else m = newMat->rmap->n; /* Set by user */ 3469 3470 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3471 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3472 3473 /* First process needs enough room for process with most rows */ 3474 if (!rank) { 3475 mmax = rowners[1]; 3476 for (i=2; i<=size; i++) { 3477 mmax = PetscMax(mmax, rowners[i]); 3478 } 3479 } else mmax = m; 3480 3481 rowners[0] = 0; 3482 for (i=2; i<=size; i++) { 3483 rowners[i] += rowners[i-1]; 3484 } 3485 rstart = rowners[rank]; 3486 rend = rowners[rank+1]; 3487 3488 /* distribute row lengths to all processors */ 3489 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3490 if (!rank) { 3491 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3492 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3493 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3494 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3495 for (j=0; j<m; j++) { 3496 procsnz[0] += ourlens[j]; 3497 } 3498 for (i=1; i<size; i++) { 3499 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3500 /* calculate the number of nonzeros on each processor */ 3501 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3502 procsnz[i] += rowlengths[j]; 3503 } 3504 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3505 } 3506 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3507 } else { 3508 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3509 } 3510 3511 if (!rank) { 3512 /* determine max buffer needed and allocate it */ 3513 maxnz = 0; 3514 for (i=0; i<size; i++) { 3515 maxnz = PetscMax(maxnz,procsnz[i]); 3516 } 3517 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3518 3519 /* read in my part of the matrix column indices */ 3520 nz = procsnz[0]; 3521 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3522 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3523 3524 /* read in every one elses and ship off */ 3525 for (i=1; i<size; i++) { 3526 nz = procsnz[i]; 3527 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3528 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3529 } 3530 ierr = PetscFree(cols);CHKERRQ(ierr); 3531 } else { 3532 /* determine buffer space needed for message */ 3533 nz = 0; 3534 for (i=0; i<m; i++) { 3535 nz += ourlens[i]; 3536 } 3537 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3538 3539 /* receive message of column indices*/ 3540 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3541 } 3542 3543 /* determine column ownership if matrix is not square */ 3544 if (N != M) { 3545 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3546 else n = newMat->cmap->n; 3547 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3548 cstart = cend - n; 3549 } else { 3550 cstart = rstart; 3551 cend = rend; 3552 n = cend - cstart; 3553 } 3554 3555 /* loop over local rows, determining number of off diagonal entries */ 3556 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3557 jj = 0; 3558 for (i=0; i<m; i++) { 3559 for (j=0; j<ourlens[i]; j++) { 3560 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3561 jj++; 3562 } 3563 } 3564 3565 for (i=0; i<m; i++) { 3566 ourlens[i] -= offlens[i]; 3567 } 3568 if (!sizesset) { 3569 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3570 } 3571 3572 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3573 3574 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3575 3576 for (i=0; i<m; i++) { 3577 ourlens[i] += offlens[i]; 3578 } 3579 3580 if (!rank) { 3581 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3582 3583 /* read in my part of the matrix numerical values */ 3584 nz = procsnz[0]; 3585 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3586 3587 /* insert into matrix */ 3588 jj = rstart; 3589 smycols = mycols; 3590 svals = vals; 3591 for (i=0; i<m; i++) { 3592 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3593 smycols += ourlens[i]; 3594 svals += ourlens[i]; 3595 jj++; 3596 } 3597 3598 /* read in other processors and ship out */ 3599 for (i=1; i<size; i++) { 3600 nz = procsnz[i]; 3601 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3602 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3603 } 3604 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3605 } else { 3606 /* receive numeric values */ 3607 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3608 3609 /* receive message of values*/ 3610 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3611 3612 /* insert into matrix */ 3613 jj = rstart; 3614 smycols = mycols; 3615 svals = vals; 3616 for (i=0; i<m; i++) { 3617 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3618 smycols += ourlens[i]; 3619 svals += ourlens[i]; 3620 jj++; 3621 } 3622 } 3623 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3624 ierr = PetscFree(vals);CHKERRQ(ierr); 3625 ierr = PetscFree(mycols);CHKERRQ(ierr); 3626 ierr = PetscFree(rowners);CHKERRQ(ierr); 3627 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3628 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3629 3630 PetscFunctionReturn(0); 3631 } 3632 3633 #undef __FUNCT__ 3634 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3635 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3636 { 3637 PetscErrorCode ierr; 3638 IS iscol_local; 3639 PetscInt csize; 3640 3641 PetscFunctionBegin; 3642 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3643 if (call == MAT_REUSE_MATRIX) { 3644 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3645 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3646 } else { 3647 PetscInt cbs; 3648 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3649 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3650 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3651 } 3652 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3653 if (call == MAT_INITIAL_MATRIX) { 3654 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3655 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3656 } 3657 PetscFunctionReturn(0); 3658 } 3659 3660 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3661 #undef __FUNCT__ 3662 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3663 /* 3664 Not great since it makes two copies of the submatrix, first an SeqAIJ 3665 in local and then by concatenating the local matrices the end result. 3666 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3667 3668 Note: This requires a sequential iscol with all indices. 3669 */ 3670 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3671 { 3672 PetscErrorCode ierr; 3673 PetscMPIInt rank,size; 3674 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3675 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3676 PetscBool allcolumns, colflag; 3677 Mat M,Mreuse; 3678 MatScalar *vwork,*aa; 3679 MPI_Comm comm = ((PetscObject)mat)->comm; 3680 Mat_SeqAIJ *aij; 3681 3682 3683 PetscFunctionBegin; 3684 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3685 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3686 3687 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3688 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3689 if (colflag && ncol == mat->cmap->N){ 3690 allcolumns = PETSC_TRUE; 3691 } else { 3692 allcolumns = PETSC_FALSE; 3693 } 3694 if (call == MAT_REUSE_MATRIX) { 3695 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3696 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3697 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3698 } else { 3699 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3700 } 3701 3702 /* 3703 m - number of local rows 3704 n - number of columns (same on all processors) 3705 rstart - first row in new global matrix generated 3706 */ 3707 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3708 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3709 if (call == MAT_INITIAL_MATRIX) { 3710 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3711 ii = aij->i; 3712 jj = aij->j; 3713 3714 /* 3715 Determine the number of non-zeros in the diagonal and off-diagonal 3716 portions of the matrix in order to do correct preallocation 3717 */ 3718 3719 /* first get start and end of "diagonal" columns */ 3720 if (csize == PETSC_DECIDE) { 3721 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3722 if (mglobal == n) { /* square matrix */ 3723 nlocal = m; 3724 } else { 3725 nlocal = n/size + ((n % size) > rank); 3726 } 3727 } else { 3728 nlocal = csize; 3729 } 3730 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3731 rstart = rend - nlocal; 3732 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3733 3734 /* next, compute all the lengths */ 3735 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3736 olens = dlens + m; 3737 for (i=0; i<m; i++) { 3738 jend = ii[i+1] - ii[i]; 3739 olen = 0; 3740 dlen = 0; 3741 for (j=0; j<jend; j++) { 3742 if (*jj < rstart || *jj >= rend) olen++; 3743 else dlen++; 3744 jj++; 3745 } 3746 olens[i] = olen; 3747 dlens[i] = dlen; 3748 } 3749 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3750 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3751 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3752 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3753 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3754 ierr = PetscFree(dlens);CHKERRQ(ierr); 3755 } else { 3756 PetscInt ml,nl; 3757 3758 M = *newmat; 3759 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3760 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3761 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3762 /* 3763 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3764 rather than the slower MatSetValues(). 3765 */ 3766 M->was_assembled = PETSC_TRUE; 3767 M->assembled = PETSC_FALSE; 3768 } 3769 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3770 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3771 ii = aij->i; 3772 jj = aij->j; 3773 aa = aij->a; 3774 for (i=0; i<m; i++) { 3775 row = rstart + i; 3776 nz = ii[i+1] - ii[i]; 3777 cwork = jj; jj += nz; 3778 vwork = aa; aa += nz; 3779 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3780 } 3781 3782 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3783 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3784 *newmat = M; 3785 3786 /* save submatrix used in processor for next request */ 3787 if (call == MAT_INITIAL_MATRIX) { 3788 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3789 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3790 } 3791 3792 PetscFunctionReturn(0); 3793 } 3794 3795 EXTERN_C_BEGIN 3796 #undef __FUNCT__ 3797 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3798 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3799 { 3800 PetscInt m,cstart, cend,j,nnz,i,d; 3801 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3802 const PetscInt *JJ; 3803 PetscScalar *values; 3804 PetscErrorCode ierr; 3805 3806 PetscFunctionBegin; 3807 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3808 3809 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3810 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3811 m = B->rmap->n; 3812 cstart = B->cmap->rstart; 3813 cend = B->cmap->rend; 3814 rstart = B->rmap->rstart; 3815 3816 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3817 3818 #if defined(PETSC_USE_DEBUGGING) 3819 for (i=0; i<m; i++) { 3820 nnz = Ii[i+1]- Ii[i]; 3821 JJ = J + Ii[i]; 3822 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3823 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3824 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3825 } 3826 #endif 3827 3828 for (i=0; i<m; i++) { 3829 nnz = Ii[i+1]- Ii[i]; 3830 JJ = J + Ii[i]; 3831 nnz_max = PetscMax(nnz_max,nnz); 3832 d = 0; 3833 for (j=0; j<nnz; j++) { 3834 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3835 } 3836 d_nnz[i] = d; 3837 o_nnz[i] = nnz - d; 3838 } 3839 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3840 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3841 3842 if (v) values = (PetscScalar*)v; 3843 else { 3844 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3845 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3846 } 3847 3848 for (i=0; i<m; i++) { 3849 ii = i + rstart; 3850 nnz = Ii[i+1]- Ii[i]; 3851 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3852 } 3853 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3854 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3855 3856 if (!v) { 3857 ierr = PetscFree(values);CHKERRQ(ierr); 3858 } 3859 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3860 PetscFunctionReturn(0); 3861 } 3862 EXTERN_C_END 3863 3864 #undef __FUNCT__ 3865 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3866 /*@ 3867 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3868 (the default parallel PETSc format). 3869 3870 Collective on MPI_Comm 3871 3872 Input Parameters: 3873 + B - the matrix 3874 . i - the indices into j for the start of each local row (starts with zero) 3875 . j - the column indices for each local row (starts with zero) 3876 - v - optional values in the matrix 3877 3878 Level: developer 3879 3880 Notes: 3881 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3882 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3883 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3884 3885 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3886 3887 The format which is used for the sparse matrix input, is equivalent to a 3888 row-major ordering.. i.e for the following matrix, the input data expected is 3889 as shown: 3890 3891 1 0 0 3892 2 0 3 P0 3893 ------- 3894 4 5 6 P1 3895 3896 Process0 [P0]: rows_owned=[0,1] 3897 i = {0,1,3} [size = nrow+1 = 2+1] 3898 j = {0,0,2} [size = nz = 6] 3899 v = {1,2,3} [size = nz = 6] 3900 3901 Process1 [P1]: rows_owned=[2] 3902 i = {0,3} [size = nrow+1 = 1+1] 3903 j = {0,1,2} [size = nz = 6] 3904 v = {4,5,6} [size = nz = 6] 3905 3906 .keywords: matrix, aij, compressed row, sparse, parallel 3907 3908 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3909 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3910 @*/ 3911 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3912 { 3913 PetscErrorCode ierr; 3914 3915 PetscFunctionBegin; 3916 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3917 PetscFunctionReturn(0); 3918 } 3919 3920 #undef __FUNCT__ 3921 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3922 /*@C 3923 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3924 (the default parallel PETSc format). For good matrix assembly performance 3925 the user should preallocate the matrix storage by setting the parameters 3926 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3927 performance can be increased by more than a factor of 50. 3928 3929 Collective on MPI_Comm 3930 3931 Input Parameters: 3932 + A - the matrix 3933 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3934 (same value is used for all local rows) 3935 . d_nnz - array containing the number of nonzeros in the various rows of the 3936 DIAGONAL portion of the local submatrix (possibly different for each row) 3937 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3938 The size of this array is equal to the number of local rows, i.e 'm'. 3939 For matrices that will be factored, you must leave room for (and set) 3940 the diagonal entry even if it is zero. 3941 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3942 submatrix (same value is used for all local rows). 3943 - o_nnz - array containing the number of nonzeros in the various rows of the 3944 OFF-DIAGONAL portion of the local submatrix (possibly different for 3945 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3946 structure. The size of this array is equal to the number 3947 of local rows, i.e 'm'. 3948 3949 If the *_nnz parameter is given then the *_nz parameter is ignored 3950 3951 The AIJ format (also called the Yale sparse matrix format or 3952 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3953 storage. The stored row and column indices begin with zero. 3954 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3955 3956 The parallel matrix is partitioned such that the first m0 rows belong to 3957 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3958 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3959 3960 The DIAGONAL portion of the local submatrix of a processor can be defined 3961 as the submatrix which is obtained by extraction the part corresponding to 3962 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3963 first row that belongs to the processor, r2 is the last row belonging to 3964 the this processor, and c1-c2 is range of indices of the local part of a 3965 vector suitable for applying the matrix to. This is an mxn matrix. In the 3966 common case of a square matrix, the row and column ranges are the same and 3967 the DIAGONAL part is also square. The remaining portion of the local 3968 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3969 3970 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3971 3972 You can call MatGetInfo() to get information on how effective the preallocation was; 3973 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3974 You can also run with the option -info and look for messages with the string 3975 malloc in them to see if additional memory allocation was needed. 3976 3977 Example usage: 3978 3979 Consider the following 8x8 matrix with 34 non-zero values, that is 3980 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3981 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3982 as follows: 3983 3984 .vb 3985 1 2 0 | 0 3 0 | 0 4 3986 Proc0 0 5 6 | 7 0 0 | 8 0 3987 9 0 10 | 11 0 0 | 12 0 3988 ------------------------------------- 3989 13 0 14 | 15 16 17 | 0 0 3990 Proc1 0 18 0 | 19 20 21 | 0 0 3991 0 0 0 | 22 23 0 | 24 0 3992 ------------------------------------- 3993 Proc2 25 26 27 | 0 0 28 | 29 0 3994 30 0 0 | 31 32 33 | 0 34 3995 .ve 3996 3997 This can be represented as a collection of submatrices as: 3998 3999 .vb 4000 A B C 4001 D E F 4002 G H I 4003 .ve 4004 4005 Where the submatrices A,B,C are owned by proc0, D,E,F are 4006 owned by proc1, G,H,I are owned by proc2. 4007 4008 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4009 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4010 The 'M','N' parameters are 8,8, and have the same values on all procs. 4011 4012 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4013 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4014 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4015 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4016 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4017 matrix, ans [DF] as another SeqAIJ matrix. 4018 4019 When d_nz, o_nz parameters are specified, d_nz storage elements are 4020 allocated for every row of the local diagonal submatrix, and o_nz 4021 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4022 One way to choose d_nz and o_nz is to use the max nonzerors per local 4023 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4024 In this case, the values of d_nz,o_nz are: 4025 .vb 4026 proc0 : dnz = 2, o_nz = 2 4027 proc1 : dnz = 3, o_nz = 2 4028 proc2 : dnz = 1, o_nz = 4 4029 .ve 4030 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4031 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4032 for proc3. i.e we are using 12+15+10=37 storage locations to store 4033 34 values. 4034 4035 When d_nnz, o_nnz parameters are specified, the storage is specified 4036 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4037 In the above case the values for d_nnz,o_nnz are: 4038 .vb 4039 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4040 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4041 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4042 .ve 4043 Here the space allocated is sum of all the above values i.e 34, and 4044 hence pre-allocation is perfect. 4045 4046 Level: intermediate 4047 4048 .keywords: matrix, aij, compressed row, sparse, parallel 4049 4050 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4051 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4052 @*/ 4053 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4054 { 4055 PetscErrorCode ierr; 4056 4057 PetscFunctionBegin; 4058 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4059 PetscValidType(B,1); 4060 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4061 PetscFunctionReturn(0); 4062 } 4063 4064 #undef __FUNCT__ 4065 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4066 /*@ 4067 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4068 CSR format the local rows. 4069 4070 Collective on MPI_Comm 4071 4072 Input Parameters: 4073 + comm - MPI communicator 4074 . m - number of local rows (Cannot be PETSC_DECIDE) 4075 . n - This value should be the same as the local size used in creating the 4076 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4077 calculated if N is given) For square matrices n is almost always m. 4078 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4079 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4080 . i - row indices 4081 . j - column indices 4082 - a - matrix values 4083 4084 Output Parameter: 4085 . mat - the matrix 4086 4087 Level: intermediate 4088 4089 Notes: 4090 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4091 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4092 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4093 4094 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4095 4096 The format which is used for the sparse matrix input, is equivalent to a 4097 row-major ordering.. i.e for the following matrix, the input data expected is 4098 as shown: 4099 4100 1 0 0 4101 2 0 3 P0 4102 ------- 4103 4 5 6 P1 4104 4105 Process0 [P0]: rows_owned=[0,1] 4106 i = {0,1,3} [size = nrow+1 = 2+1] 4107 j = {0,0,2} [size = nz = 6] 4108 v = {1,2,3} [size = nz = 6] 4109 4110 Process1 [P1]: rows_owned=[2] 4111 i = {0,3} [size = nrow+1 = 1+1] 4112 j = {0,1,2} [size = nz = 6] 4113 v = {4,5,6} [size = nz = 6] 4114 4115 .keywords: matrix, aij, compressed row, sparse, parallel 4116 4117 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4118 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4119 @*/ 4120 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4121 { 4122 PetscErrorCode ierr; 4123 4124 PetscFunctionBegin; 4125 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4126 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4127 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4128 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4129 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4130 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4131 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4132 PetscFunctionReturn(0); 4133 } 4134 4135 #undef __FUNCT__ 4136 #define __FUNCT__ "MatCreateAIJ" 4137 /*@C 4138 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4139 (the default parallel PETSc format). For good matrix assembly performance 4140 the user should preallocate the matrix storage by setting the parameters 4141 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4142 performance can be increased by more than a factor of 50. 4143 4144 Collective on MPI_Comm 4145 4146 Input Parameters: 4147 + comm - MPI communicator 4148 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4149 This value should be the same as the local size used in creating the 4150 y vector for the matrix-vector product y = Ax. 4151 . n - This value should be the same as the local size used in creating the 4152 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4153 calculated if N is given) For square matrices n is almost always m. 4154 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4155 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4156 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4157 (same value is used for all local rows) 4158 . d_nnz - array containing the number of nonzeros in the various rows of the 4159 DIAGONAL portion of the local submatrix (possibly different for each row) 4160 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4161 The size of this array is equal to the number of local rows, i.e 'm'. 4162 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4163 submatrix (same value is used for all local rows). 4164 - o_nnz - array containing the number of nonzeros in the various rows of the 4165 OFF-DIAGONAL portion of the local submatrix (possibly different for 4166 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4167 structure. The size of this array is equal to the number 4168 of local rows, i.e 'm'. 4169 4170 Output Parameter: 4171 . A - the matrix 4172 4173 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4174 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4175 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4176 4177 Notes: 4178 If the *_nnz parameter is given then the *_nz parameter is ignored 4179 4180 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4181 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4182 storage requirements for this matrix. 4183 4184 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4185 processor than it must be used on all processors that share the object for 4186 that argument. 4187 4188 The user MUST specify either the local or global matrix dimensions 4189 (possibly both). 4190 4191 The parallel matrix is partitioned across processors such that the 4192 first m0 rows belong to process 0, the next m1 rows belong to 4193 process 1, the next m2 rows belong to process 2 etc.. where 4194 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4195 values corresponding to [m x N] submatrix. 4196 4197 The columns are logically partitioned with the n0 columns belonging 4198 to 0th partition, the next n1 columns belonging to the next 4199 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4200 4201 The DIAGONAL portion of the local submatrix on any given processor 4202 is the submatrix corresponding to the rows and columns m,n 4203 corresponding to the given processor. i.e diagonal matrix on 4204 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4205 etc. The remaining portion of the local submatrix [m x (N-n)] 4206 constitute the OFF-DIAGONAL portion. The example below better 4207 illustrates this concept. 4208 4209 For a square global matrix we define each processor's diagonal portion 4210 to be its local rows and the corresponding columns (a square submatrix); 4211 each processor's off-diagonal portion encompasses the remainder of the 4212 local matrix (a rectangular submatrix). 4213 4214 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4215 4216 When calling this routine with a single process communicator, a matrix of 4217 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4218 type of communicator, use the construction mechanism: 4219 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4220 4221 By default, this format uses inodes (identical nodes) when possible. 4222 We search for consecutive rows with the same nonzero structure, thereby 4223 reusing matrix information to achieve increased efficiency. 4224 4225 Options Database Keys: 4226 + -mat_no_inode - Do not use inodes 4227 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4228 - -mat_aij_oneindex - Internally use indexing starting at 1 4229 rather than 0. Note that when calling MatSetValues(), 4230 the user still MUST index entries starting at 0! 4231 4232 4233 Example usage: 4234 4235 Consider the following 8x8 matrix with 34 non-zero values, that is 4236 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4237 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4238 as follows: 4239 4240 .vb 4241 1 2 0 | 0 3 0 | 0 4 4242 Proc0 0 5 6 | 7 0 0 | 8 0 4243 9 0 10 | 11 0 0 | 12 0 4244 ------------------------------------- 4245 13 0 14 | 15 16 17 | 0 0 4246 Proc1 0 18 0 | 19 20 21 | 0 0 4247 0 0 0 | 22 23 0 | 24 0 4248 ------------------------------------- 4249 Proc2 25 26 27 | 0 0 28 | 29 0 4250 30 0 0 | 31 32 33 | 0 34 4251 .ve 4252 4253 This can be represented as a collection of submatrices as: 4254 4255 .vb 4256 A B C 4257 D E F 4258 G H I 4259 .ve 4260 4261 Where the submatrices A,B,C are owned by proc0, D,E,F are 4262 owned by proc1, G,H,I are owned by proc2. 4263 4264 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4265 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4266 The 'M','N' parameters are 8,8, and have the same values on all procs. 4267 4268 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4269 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4270 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4271 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4272 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4273 matrix, ans [DF] as another SeqAIJ matrix. 4274 4275 When d_nz, o_nz parameters are specified, d_nz storage elements are 4276 allocated for every row of the local diagonal submatrix, and o_nz 4277 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4278 One way to choose d_nz and o_nz is to use the max nonzerors per local 4279 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4280 In this case, the values of d_nz,o_nz are: 4281 .vb 4282 proc0 : dnz = 2, o_nz = 2 4283 proc1 : dnz = 3, o_nz = 2 4284 proc2 : dnz = 1, o_nz = 4 4285 .ve 4286 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4287 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4288 for proc3. i.e we are using 12+15+10=37 storage locations to store 4289 34 values. 4290 4291 When d_nnz, o_nnz parameters are specified, the storage is specified 4292 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4293 In the above case the values for d_nnz,o_nnz are: 4294 .vb 4295 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4296 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4297 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4298 .ve 4299 Here the space allocated is sum of all the above values i.e 34, and 4300 hence pre-allocation is perfect. 4301 4302 Level: intermediate 4303 4304 .keywords: matrix, aij, compressed row, sparse, parallel 4305 4306 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4307 MPIAIJ, MatCreateMPIAIJWithArrays() 4308 @*/ 4309 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4310 { 4311 PetscErrorCode ierr; 4312 PetscMPIInt size; 4313 4314 PetscFunctionBegin; 4315 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4316 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4317 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4318 if (size > 1) { 4319 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4320 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4321 } else { 4322 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4323 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4324 } 4325 PetscFunctionReturn(0); 4326 } 4327 4328 #undef __FUNCT__ 4329 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4330 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4331 { 4332 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4333 4334 PetscFunctionBegin; 4335 *Ad = a->A; 4336 *Ao = a->B; 4337 *colmap = a->garray; 4338 PetscFunctionReturn(0); 4339 } 4340 4341 #undef __FUNCT__ 4342 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4343 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4344 { 4345 PetscErrorCode ierr; 4346 PetscInt i; 4347 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4348 4349 PetscFunctionBegin; 4350 if (coloring->ctype == IS_COLORING_GLOBAL) { 4351 ISColoringValue *allcolors,*colors; 4352 ISColoring ocoloring; 4353 4354 /* set coloring for diagonal portion */ 4355 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4356 4357 /* set coloring for off-diagonal portion */ 4358 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4359 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4360 for (i=0; i<a->B->cmap->n; i++) { 4361 colors[i] = allcolors[a->garray[i]]; 4362 } 4363 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4364 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4365 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4366 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4367 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4368 ISColoringValue *colors; 4369 PetscInt *larray; 4370 ISColoring ocoloring; 4371 4372 /* set coloring for diagonal portion */ 4373 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4374 for (i=0; i<a->A->cmap->n; i++) { 4375 larray[i] = i + A->cmap->rstart; 4376 } 4377 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4378 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4379 for (i=0; i<a->A->cmap->n; i++) { 4380 colors[i] = coloring->colors[larray[i]]; 4381 } 4382 ierr = PetscFree(larray);CHKERRQ(ierr); 4383 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4384 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4385 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4386 4387 /* set coloring for off-diagonal portion */ 4388 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4389 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4390 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4391 for (i=0; i<a->B->cmap->n; i++) { 4392 colors[i] = coloring->colors[larray[i]]; 4393 } 4394 ierr = PetscFree(larray);CHKERRQ(ierr); 4395 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4396 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4397 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4398 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4399 4400 PetscFunctionReturn(0); 4401 } 4402 4403 #undef __FUNCT__ 4404 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4405 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4406 { 4407 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4408 PetscErrorCode ierr; 4409 4410 PetscFunctionBegin; 4411 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4412 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4413 PetscFunctionReturn(0); 4414 } 4415 4416 #undef __FUNCT__ 4417 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4418 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4419 { 4420 PetscErrorCode ierr; 4421 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4422 PetscInt *indx; 4423 4424 PetscFunctionBegin; 4425 /* This routine will ONLY return MPIAIJ type matrix */ 4426 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4427 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4428 if (n == PETSC_DECIDE){ 4429 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4430 } 4431 /* Check sum(n) = N */ 4432 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4433 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4434 4435 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4436 rstart -= m; 4437 4438 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4439 for (i=0;i<m;i++) { 4440 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4441 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4442 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4443 } 4444 4445 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4446 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4447 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4448 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4449 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4450 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4451 PetscFunctionReturn(0); 4452 } 4453 4454 #undef __FUNCT__ 4455 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4456 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4457 { 4458 PetscErrorCode ierr; 4459 PetscInt m,N,i,rstart,nnz,Ii; 4460 PetscInt *indx; 4461 PetscScalar *values; 4462 4463 PetscFunctionBegin; 4464 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4465 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4466 for (i=0;i<m;i++) { 4467 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4468 Ii = i + rstart; 4469 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4470 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4471 } 4472 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4473 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4474 PetscFunctionReturn(0); 4475 } 4476 4477 #undef __FUNCT__ 4478 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4479 /*@ 4480 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4481 matrices from each processor 4482 4483 Collective on MPI_Comm 4484 4485 Input Parameters: 4486 + comm - the communicators the parallel matrix will live on 4487 . inmat - the input sequential matrices 4488 . n - number of local columns (or PETSC_DECIDE) 4489 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4490 4491 Output Parameter: 4492 . outmat - the parallel matrix generated 4493 4494 Level: advanced 4495 4496 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4497 4498 @*/ 4499 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4500 { 4501 PetscErrorCode ierr; 4502 4503 PetscFunctionBegin; 4504 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4505 if (scall == MAT_INITIAL_MATRIX){ 4506 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4507 } 4508 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4509 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4510 PetscFunctionReturn(0); 4511 } 4512 4513 #undef __FUNCT__ 4514 #define __FUNCT__ "MatFileSplit" 4515 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4516 { 4517 PetscErrorCode ierr; 4518 PetscMPIInt rank; 4519 PetscInt m,N,i,rstart,nnz; 4520 size_t len; 4521 const PetscInt *indx; 4522 PetscViewer out; 4523 char *name; 4524 Mat B; 4525 const PetscScalar *values; 4526 4527 PetscFunctionBegin; 4528 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4529 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4530 /* Should this be the type of the diagonal block of A? */ 4531 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4532 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4533 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4534 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4535 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4536 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4537 for (i=0;i<m;i++) { 4538 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4539 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4540 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4541 } 4542 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4543 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4544 4545 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4546 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4547 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4548 sprintf(name,"%s.%d",outfile,rank); 4549 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4550 ierr = PetscFree(name);CHKERRQ(ierr); 4551 ierr = MatView(B,out);CHKERRQ(ierr); 4552 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4553 ierr = MatDestroy(&B);CHKERRQ(ierr); 4554 PetscFunctionReturn(0); 4555 } 4556 4557 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4558 #undef __FUNCT__ 4559 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4560 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4561 { 4562 PetscErrorCode ierr; 4563 Mat_Merge_SeqsToMPI *merge; 4564 PetscContainer container; 4565 4566 PetscFunctionBegin; 4567 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4568 if (container) { 4569 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4570 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4571 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4572 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4573 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4574 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4575 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4576 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4577 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4578 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4579 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4580 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4581 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4582 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4583 ierr = PetscFree(merge);CHKERRQ(ierr); 4584 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4585 } 4586 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4587 PetscFunctionReturn(0); 4588 } 4589 4590 #include <../src/mat/utils/freespace.h> 4591 #include <petscbt.h> 4592 4593 #undef __FUNCT__ 4594 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4595 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4596 { 4597 PetscErrorCode ierr; 4598 MPI_Comm comm=((PetscObject)mpimat)->comm; 4599 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4600 PetscMPIInt size,rank,taga,*len_s; 4601 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4602 PetscInt proc,m; 4603 PetscInt **buf_ri,**buf_rj; 4604 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4605 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4606 MPI_Request *s_waits,*r_waits; 4607 MPI_Status *status; 4608 MatScalar *aa=a->a; 4609 MatScalar **abuf_r,*ba_i; 4610 Mat_Merge_SeqsToMPI *merge; 4611 PetscContainer container; 4612 4613 PetscFunctionBegin; 4614 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4615 4616 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4617 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4618 4619 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4620 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4621 4622 bi = merge->bi; 4623 bj = merge->bj; 4624 buf_ri = merge->buf_ri; 4625 buf_rj = merge->buf_rj; 4626 4627 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4628 owners = merge->rowmap->range; 4629 len_s = merge->len_s; 4630 4631 /* send and recv matrix values */ 4632 /*-----------------------------*/ 4633 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4634 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4635 4636 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4637 for (proc=0,k=0; proc<size; proc++){ 4638 if (!len_s[proc]) continue; 4639 i = owners[proc]; 4640 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4641 k++; 4642 } 4643 4644 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4645 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4646 ierr = PetscFree(status);CHKERRQ(ierr); 4647 4648 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4649 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4650 4651 /* insert mat values of mpimat */ 4652 /*----------------------------*/ 4653 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4654 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4655 4656 for (k=0; k<merge->nrecv; k++){ 4657 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4658 nrows = *(buf_ri_k[k]); 4659 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4660 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4661 } 4662 4663 /* set values of ba */ 4664 m = merge->rowmap->n; 4665 for (i=0; i<m; i++) { 4666 arow = owners[rank] + i; 4667 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4668 bnzi = bi[i+1] - bi[i]; 4669 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4670 4671 /* add local non-zero vals of this proc's seqmat into ba */ 4672 anzi = ai[arow+1] - ai[arow]; 4673 aj = a->j + ai[arow]; 4674 aa = a->a + ai[arow]; 4675 nextaj = 0; 4676 for (j=0; nextaj<anzi; j++){ 4677 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4678 ba_i[j] += aa[nextaj++]; 4679 } 4680 } 4681 4682 /* add received vals into ba */ 4683 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4684 /* i-th row */ 4685 if (i == *nextrow[k]) { 4686 anzi = *(nextai[k]+1) - *nextai[k]; 4687 aj = buf_rj[k] + *(nextai[k]); 4688 aa = abuf_r[k] + *(nextai[k]); 4689 nextaj = 0; 4690 for (j=0; nextaj<anzi; j++){ 4691 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4692 ba_i[j] += aa[nextaj++]; 4693 } 4694 } 4695 nextrow[k]++; nextai[k]++; 4696 } 4697 } 4698 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4699 } 4700 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4701 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4702 4703 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4704 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4705 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4706 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4707 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4708 PetscFunctionReturn(0); 4709 } 4710 4711 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4712 4713 #undef __FUNCT__ 4714 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4715 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4716 { 4717 PetscErrorCode ierr; 4718 Mat B_mpi; 4719 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4720 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4721 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4722 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4723 PetscInt len,proc,*dnz,*onz,bs,cbs; 4724 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4725 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4726 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4727 MPI_Status *status; 4728 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4729 PetscBT lnkbt; 4730 Mat_Merge_SeqsToMPI *merge; 4731 PetscContainer container; 4732 4733 PetscFunctionBegin; 4734 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4735 4736 /* make sure it is a PETSc comm */ 4737 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4738 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4739 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4740 4741 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4742 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4743 4744 /* determine row ownership */ 4745 /*---------------------------------------------------------*/ 4746 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4747 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4748 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4749 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4750 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4751 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4752 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4753 4754 m = merge->rowmap->n; 4755 owners = merge->rowmap->range; 4756 4757 /* determine the number of messages to send, their lengths */ 4758 /*---------------------------------------------------------*/ 4759 len_s = merge->len_s; 4760 4761 len = 0; /* length of buf_si[] */ 4762 merge->nsend = 0; 4763 for (proc=0; proc<size; proc++){ 4764 len_si[proc] = 0; 4765 if (proc == rank){ 4766 len_s[proc] = 0; 4767 } else { 4768 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4769 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4770 } 4771 if (len_s[proc]) { 4772 merge->nsend++; 4773 nrows = 0; 4774 for (i=owners[proc]; i<owners[proc+1]; i++){ 4775 if (ai[i+1] > ai[i]) nrows++; 4776 } 4777 len_si[proc] = 2*(nrows+1); 4778 len += len_si[proc]; 4779 } 4780 } 4781 4782 /* determine the number and length of messages to receive for ij-structure */ 4783 /*-------------------------------------------------------------------------*/ 4784 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4785 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4786 4787 /* post the Irecv of j-structure */ 4788 /*-------------------------------*/ 4789 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4790 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4791 4792 /* post the Isend of j-structure */ 4793 /*--------------------------------*/ 4794 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4795 4796 for (proc=0, k=0; proc<size; proc++){ 4797 if (!len_s[proc]) continue; 4798 i = owners[proc]; 4799 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4800 k++; 4801 } 4802 4803 /* receives and sends of j-structure are complete */ 4804 /*------------------------------------------------*/ 4805 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4806 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4807 4808 /* send and recv i-structure */ 4809 /*---------------------------*/ 4810 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4811 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4812 4813 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4814 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4815 for (proc=0,k=0; proc<size; proc++){ 4816 if (!len_s[proc]) continue; 4817 /* form outgoing message for i-structure: 4818 buf_si[0]: nrows to be sent 4819 [1:nrows]: row index (global) 4820 [nrows+1:2*nrows+1]: i-structure index 4821 */ 4822 /*-------------------------------------------*/ 4823 nrows = len_si[proc]/2 - 1; 4824 buf_si_i = buf_si + nrows+1; 4825 buf_si[0] = nrows; 4826 buf_si_i[0] = 0; 4827 nrows = 0; 4828 for (i=owners[proc]; i<owners[proc+1]; i++){ 4829 anzi = ai[i+1] - ai[i]; 4830 if (anzi) { 4831 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4832 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4833 nrows++; 4834 } 4835 } 4836 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4837 k++; 4838 buf_si += len_si[proc]; 4839 } 4840 4841 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4842 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4843 4844 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4845 for (i=0; i<merge->nrecv; i++){ 4846 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4847 } 4848 4849 ierr = PetscFree(len_si);CHKERRQ(ierr); 4850 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4851 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4852 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4853 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4854 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4855 ierr = PetscFree(status);CHKERRQ(ierr); 4856 4857 /* compute a local seq matrix in each processor */ 4858 /*----------------------------------------------*/ 4859 /* allocate bi array and free space for accumulating nonzero column info */ 4860 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4861 bi[0] = 0; 4862 4863 /* create and initialize a linked list */ 4864 nlnk = N+1; 4865 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4866 4867 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4868 len = ai[owners[rank+1]] - ai[owners[rank]]; 4869 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4870 current_space = free_space; 4871 4872 /* determine symbolic info for each local row */ 4873 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4874 4875 for (k=0; k<merge->nrecv; k++){ 4876 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4877 nrows = *buf_ri_k[k]; 4878 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4879 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4880 } 4881 4882 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4883 len = 0; 4884 for (i=0;i<m;i++) { 4885 bnzi = 0; 4886 /* add local non-zero cols of this proc's seqmat into lnk */ 4887 arow = owners[rank] + i; 4888 anzi = ai[arow+1] - ai[arow]; 4889 aj = a->j + ai[arow]; 4890 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4891 bnzi += nlnk; 4892 /* add received col data into lnk */ 4893 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4894 if (i == *nextrow[k]) { /* i-th row */ 4895 anzi = *(nextai[k]+1) - *nextai[k]; 4896 aj = buf_rj[k] + *nextai[k]; 4897 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4898 bnzi += nlnk; 4899 nextrow[k]++; nextai[k]++; 4900 } 4901 } 4902 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4903 4904 /* if free space is not available, make more free space */ 4905 if (current_space->local_remaining<bnzi) { 4906 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4907 nspacedouble++; 4908 } 4909 /* copy data into free space, then initialize lnk */ 4910 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4911 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4912 4913 current_space->array += bnzi; 4914 current_space->local_used += bnzi; 4915 current_space->local_remaining -= bnzi; 4916 4917 bi[i+1] = bi[i] + bnzi; 4918 } 4919 4920 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4921 4922 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4923 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4924 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4925 4926 /* create symbolic parallel matrix B_mpi */ 4927 /*---------------------------------------*/ 4928 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4929 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4930 if (n==PETSC_DECIDE) { 4931 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4932 } else { 4933 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4934 } 4935 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4936 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4937 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4938 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4939 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4940 4941 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4942 B_mpi->assembled = PETSC_FALSE; 4943 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4944 merge->bi = bi; 4945 merge->bj = bj; 4946 merge->buf_ri = buf_ri; 4947 merge->buf_rj = buf_rj; 4948 merge->coi = PETSC_NULL; 4949 merge->coj = PETSC_NULL; 4950 merge->owners_co = PETSC_NULL; 4951 4952 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4953 4954 /* attach the supporting struct to B_mpi for reuse */ 4955 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4956 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4957 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4958 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4959 *mpimat = B_mpi; 4960 4961 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4962 PetscFunctionReturn(0); 4963 } 4964 4965 #undef __FUNCT__ 4966 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4967 /*@C 4968 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4969 matrices from each processor 4970 4971 Collective on MPI_Comm 4972 4973 Input Parameters: 4974 + comm - the communicators the parallel matrix will live on 4975 . seqmat - the input sequential matrices 4976 . m - number of local rows (or PETSC_DECIDE) 4977 . n - number of local columns (or PETSC_DECIDE) 4978 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4979 4980 Output Parameter: 4981 . mpimat - the parallel matrix generated 4982 4983 Level: advanced 4984 4985 Notes: 4986 The dimensions of the sequential matrix in each processor MUST be the same. 4987 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4988 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4989 @*/ 4990 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4991 { 4992 PetscErrorCode ierr; 4993 PetscMPIInt size; 4994 4995 PetscFunctionBegin; 4996 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4997 if (size == 1){ 4998 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4999 if (scall == MAT_INITIAL_MATRIX){ 5000 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5001 } else { 5002 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5003 } 5004 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5005 PetscFunctionReturn(0); 5006 } 5007 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5008 if (scall == MAT_INITIAL_MATRIX){ 5009 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5010 } 5011 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5012 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5013 PetscFunctionReturn(0); 5014 } 5015 5016 #undef __FUNCT__ 5017 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5018 /*@ 5019 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5020 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5021 with MatGetSize() 5022 5023 Not Collective 5024 5025 Input Parameters: 5026 + A - the matrix 5027 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5028 5029 Output Parameter: 5030 . A_loc - the local sequential matrix generated 5031 5032 Level: developer 5033 5034 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5035 5036 @*/ 5037 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5038 { 5039 PetscErrorCode ierr; 5040 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5041 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5042 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5043 MatScalar *aa=a->a,*ba=b->a,*cam; 5044 PetscScalar *ca; 5045 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5046 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5047 PetscBool match; 5048 5049 PetscFunctionBegin; 5050 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5051 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5052 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5053 if (scall == MAT_INITIAL_MATRIX){ 5054 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5055 ci[0] = 0; 5056 for (i=0; i<am; i++){ 5057 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5058 } 5059 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5060 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5061 k = 0; 5062 for (i=0; i<am; i++) { 5063 ncols_o = bi[i+1] - bi[i]; 5064 ncols_d = ai[i+1] - ai[i]; 5065 /* off-diagonal portion of A */ 5066 for (jo=0; jo<ncols_o; jo++) { 5067 col = cmap[*bj]; 5068 if (col >= cstart) break; 5069 cj[k] = col; bj++; 5070 ca[k++] = *ba++; 5071 } 5072 /* diagonal portion of A */ 5073 for (j=0; j<ncols_d; j++) { 5074 cj[k] = cstart + *aj++; 5075 ca[k++] = *aa++; 5076 } 5077 /* off-diagonal portion of A */ 5078 for (j=jo; j<ncols_o; j++) { 5079 cj[k] = cmap[*bj++]; 5080 ca[k++] = *ba++; 5081 } 5082 } 5083 /* put together the new matrix */ 5084 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5085 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5086 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5087 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5088 mat->free_a = PETSC_TRUE; 5089 mat->free_ij = PETSC_TRUE; 5090 mat->nonew = 0; 5091 } else if (scall == MAT_REUSE_MATRIX){ 5092 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5093 ci = mat->i; cj = mat->j; cam = mat->a; 5094 for (i=0; i<am; i++) { 5095 /* off-diagonal portion of A */ 5096 ncols_o = bi[i+1] - bi[i]; 5097 for (jo=0; jo<ncols_o; jo++) { 5098 col = cmap[*bj]; 5099 if (col >= cstart) break; 5100 *cam++ = *ba++; bj++; 5101 } 5102 /* diagonal portion of A */ 5103 ncols_d = ai[i+1] - ai[i]; 5104 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5105 /* off-diagonal portion of A */ 5106 for (j=jo; j<ncols_o; j++) { 5107 *cam++ = *ba++; bj++; 5108 } 5109 } 5110 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5111 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5112 PetscFunctionReturn(0); 5113 } 5114 5115 #undef __FUNCT__ 5116 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5117 /*@C 5118 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5119 5120 Not Collective 5121 5122 Input Parameters: 5123 + A - the matrix 5124 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5125 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5126 5127 Output Parameter: 5128 . A_loc - the local sequential matrix generated 5129 5130 Level: developer 5131 5132 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5133 5134 @*/ 5135 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5136 { 5137 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5138 PetscErrorCode ierr; 5139 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5140 IS isrowa,iscola; 5141 Mat *aloc; 5142 PetscBool match; 5143 5144 PetscFunctionBegin; 5145 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5146 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5147 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5148 if (!row){ 5149 start = A->rmap->rstart; end = A->rmap->rend; 5150 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5151 } else { 5152 isrowa = *row; 5153 } 5154 if (!col){ 5155 start = A->cmap->rstart; 5156 cmap = a->garray; 5157 nzA = a->A->cmap->n; 5158 nzB = a->B->cmap->n; 5159 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5160 ncols = 0; 5161 for (i=0; i<nzB; i++) { 5162 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5163 else break; 5164 } 5165 imark = i; 5166 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5167 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5168 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5169 } else { 5170 iscola = *col; 5171 } 5172 if (scall != MAT_INITIAL_MATRIX){ 5173 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5174 aloc[0] = *A_loc; 5175 } 5176 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5177 *A_loc = aloc[0]; 5178 ierr = PetscFree(aloc);CHKERRQ(ierr); 5179 if (!row){ 5180 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5181 } 5182 if (!col){ 5183 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5184 } 5185 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5186 PetscFunctionReturn(0); 5187 } 5188 5189 #undef __FUNCT__ 5190 #define __FUNCT__ "MatGetBrowsOfAcols" 5191 /*@C 5192 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5193 5194 Collective on Mat 5195 5196 Input Parameters: 5197 + A,B - the matrices in mpiaij format 5198 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5199 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5200 5201 Output Parameter: 5202 + rowb, colb - index sets of rows and columns of B to extract 5203 - B_seq - the sequential matrix generated 5204 5205 Level: developer 5206 5207 @*/ 5208 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5209 { 5210 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5211 PetscErrorCode ierr; 5212 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5213 IS isrowb,iscolb; 5214 Mat *bseq=PETSC_NULL; 5215 5216 PetscFunctionBegin; 5217 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5218 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5219 } 5220 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5221 5222 if (scall == MAT_INITIAL_MATRIX){ 5223 start = A->cmap->rstart; 5224 cmap = a->garray; 5225 nzA = a->A->cmap->n; 5226 nzB = a->B->cmap->n; 5227 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5228 ncols = 0; 5229 for (i=0; i<nzB; i++) { /* row < local row index */ 5230 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5231 else break; 5232 } 5233 imark = i; 5234 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5235 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5236 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5237 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5238 } else { 5239 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5240 isrowb = *rowb; iscolb = *colb; 5241 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5242 bseq[0] = *B_seq; 5243 } 5244 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5245 *B_seq = bseq[0]; 5246 ierr = PetscFree(bseq);CHKERRQ(ierr); 5247 if (!rowb){ 5248 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5249 } else { 5250 *rowb = isrowb; 5251 } 5252 if (!colb){ 5253 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5254 } else { 5255 *colb = iscolb; 5256 } 5257 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5258 PetscFunctionReturn(0); 5259 } 5260 5261 #undef __FUNCT__ 5262 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5263 /* 5264 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5265 of the OFF-DIAGONAL portion of local A 5266 5267 Collective on Mat 5268 5269 Input Parameters: 5270 + A,B - the matrices in mpiaij format 5271 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5272 5273 Output Parameter: 5274 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5275 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5276 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5277 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5278 5279 Level: developer 5280 5281 */ 5282 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5283 { 5284 VecScatter_MPI_General *gen_to,*gen_from; 5285 PetscErrorCode ierr; 5286 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5287 Mat_SeqAIJ *b_oth; 5288 VecScatter ctx=a->Mvctx; 5289 MPI_Comm comm=((PetscObject)ctx)->comm; 5290 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5291 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5292 PetscScalar *rvalues,*svalues; 5293 MatScalar *b_otha,*bufa,*bufA; 5294 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5295 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5296 MPI_Status *sstatus,rstatus; 5297 PetscMPIInt jj; 5298 PetscInt *cols,sbs,rbs; 5299 PetscScalar *vals; 5300 5301 PetscFunctionBegin; 5302 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5303 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5304 } 5305 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5306 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5307 5308 gen_to = (VecScatter_MPI_General*)ctx->todata; 5309 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5310 rvalues = gen_from->values; /* holds the length of receiving row */ 5311 svalues = gen_to->values; /* holds the length of sending row */ 5312 nrecvs = gen_from->n; 5313 nsends = gen_to->n; 5314 5315 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5316 srow = gen_to->indices; /* local row index to be sent */ 5317 sstarts = gen_to->starts; 5318 sprocs = gen_to->procs; 5319 sstatus = gen_to->sstatus; 5320 sbs = gen_to->bs; 5321 rstarts = gen_from->starts; 5322 rprocs = gen_from->procs; 5323 rbs = gen_from->bs; 5324 5325 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5326 if (scall == MAT_INITIAL_MATRIX){ 5327 /* i-array */ 5328 /*---------*/ 5329 /* post receives */ 5330 for (i=0; i<nrecvs; i++){ 5331 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5332 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5333 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5334 } 5335 5336 /* pack the outgoing message */ 5337 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5338 sstartsj[0] = 0; rstartsj[0] = 0; 5339 len = 0; /* total length of j or a array to be sent */ 5340 k = 0; 5341 for (i=0; i<nsends; i++){ 5342 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5343 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5344 for (j=0; j<nrows; j++) { 5345 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5346 for (l=0; l<sbs; l++){ 5347 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5348 rowlen[j*sbs+l] = ncols; 5349 len += ncols; 5350 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5351 } 5352 k++; 5353 } 5354 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5355 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5356 } 5357 /* recvs and sends of i-array are completed */ 5358 i = nrecvs; 5359 while (i--) { 5360 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5361 } 5362 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5363 5364 /* allocate buffers for sending j and a arrays */ 5365 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5366 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5367 5368 /* create i-array of B_oth */ 5369 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5370 b_othi[0] = 0; 5371 len = 0; /* total length of j or a array to be received */ 5372 k = 0; 5373 for (i=0; i<nrecvs; i++){ 5374 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5375 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5376 for (j=0; j<nrows; j++) { 5377 b_othi[k+1] = b_othi[k] + rowlen[j]; 5378 len += rowlen[j]; k++; 5379 } 5380 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5381 } 5382 5383 /* allocate space for j and a arrrays of B_oth */ 5384 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5385 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5386 5387 /* j-array */ 5388 /*---------*/ 5389 /* post receives of j-array */ 5390 for (i=0; i<nrecvs; i++){ 5391 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5392 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5393 } 5394 5395 /* pack the outgoing message j-array */ 5396 k = 0; 5397 for (i=0; i<nsends; i++){ 5398 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5399 bufJ = bufj+sstartsj[i]; 5400 for (j=0; j<nrows; j++) { 5401 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5402 for (ll=0; ll<sbs; ll++){ 5403 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5404 for (l=0; l<ncols; l++){ 5405 *bufJ++ = cols[l]; 5406 } 5407 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5408 } 5409 } 5410 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5411 } 5412 5413 /* recvs and sends of j-array are completed */ 5414 i = nrecvs; 5415 while (i--) { 5416 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5417 } 5418 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5419 } else if (scall == MAT_REUSE_MATRIX){ 5420 sstartsj = *startsj_s; 5421 rstartsj = *startsj_r; 5422 bufa = *bufa_ptr; 5423 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5424 b_otha = b_oth->a; 5425 } else { 5426 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5427 } 5428 5429 /* a-array */ 5430 /*---------*/ 5431 /* post receives of a-array */ 5432 for (i=0; i<nrecvs; i++){ 5433 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5434 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5435 } 5436 5437 /* pack the outgoing message a-array */ 5438 k = 0; 5439 for (i=0; i<nsends; i++){ 5440 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5441 bufA = bufa+sstartsj[i]; 5442 for (j=0; j<nrows; j++) { 5443 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5444 for (ll=0; ll<sbs; ll++){ 5445 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5446 for (l=0; l<ncols; l++){ 5447 *bufA++ = vals[l]; 5448 } 5449 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5450 } 5451 } 5452 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5453 } 5454 /* recvs and sends of a-array are completed */ 5455 i = nrecvs; 5456 while (i--) { 5457 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5458 } 5459 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5460 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5461 5462 if (scall == MAT_INITIAL_MATRIX){ 5463 /* put together the new matrix */ 5464 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5465 5466 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5467 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5468 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5469 b_oth->free_a = PETSC_TRUE; 5470 b_oth->free_ij = PETSC_TRUE; 5471 b_oth->nonew = 0; 5472 5473 ierr = PetscFree(bufj);CHKERRQ(ierr); 5474 if (!startsj_s || !bufa_ptr){ 5475 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5476 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5477 } else { 5478 *startsj_s = sstartsj; 5479 *startsj_r = rstartsj; 5480 *bufa_ptr = bufa; 5481 } 5482 } 5483 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5484 PetscFunctionReturn(0); 5485 } 5486 5487 #undef __FUNCT__ 5488 #define __FUNCT__ "MatGetCommunicationStructs" 5489 /*@C 5490 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5491 5492 Not Collective 5493 5494 Input Parameters: 5495 . A - The matrix in mpiaij format 5496 5497 Output Parameter: 5498 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5499 . colmap - A map from global column index to local index into lvec 5500 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5501 5502 Level: developer 5503 5504 @*/ 5505 #if defined (PETSC_USE_CTABLE) 5506 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5507 #else 5508 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5509 #endif 5510 { 5511 Mat_MPIAIJ *a; 5512 5513 PetscFunctionBegin; 5514 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5515 PetscValidPointer(lvec, 2); 5516 PetscValidPointer(colmap, 3); 5517 PetscValidPointer(multScatter, 4); 5518 a = (Mat_MPIAIJ *) A->data; 5519 if (lvec) *lvec = a->lvec; 5520 if (colmap) *colmap = a->colmap; 5521 if (multScatter) *multScatter = a->Mvctx; 5522 PetscFunctionReturn(0); 5523 } 5524 5525 EXTERN_C_BEGIN 5526 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5527 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5528 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5529 EXTERN_C_END 5530 5531 #undef __FUNCT__ 5532 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5533 /* 5534 Computes (B'*A')' since computing B*A directly is untenable 5535 5536 n p p 5537 ( ) ( ) ( ) 5538 m ( A ) * n ( B ) = m ( C ) 5539 ( ) ( ) ( ) 5540 5541 */ 5542 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5543 { 5544 PetscErrorCode ierr; 5545 Mat At,Bt,Ct; 5546 5547 PetscFunctionBegin; 5548 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5549 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5550 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5551 ierr = MatDestroy(&At);CHKERRQ(ierr); 5552 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5553 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5554 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5555 PetscFunctionReturn(0); 5556 } 5557 5558 #undef __FUNCT__ 5559 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5560 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5561 { 5562 PetscErrorCode ierr; 5563 PetscInt m=A->rmap->n,n=B->cmap->n; 5564 Mat Cmat; 5565 5566 PetscFunctionBegin; 5567 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5568 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5569 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5570 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5571 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5572 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5573 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5574 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5575 5576 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5577 *C = Cmat; 5578 PetscFunctionReturn(0); 5579 } 5580 5581 /* ----------------------------------------------------------------*/ 5582 #undef __FUNCT__ 5583 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5584 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5585 { 5586 PetscErrorCode ierr; 5587 5588 PetscFunctionBegin; 5589 if (scall == MAT_INITIAL_MATRIX){ 5590 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5591 } 5592 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5593 PetscFunctionReturn(0); 5594 } 5595 5596 EXTERN_C_BEGIN 5597 #if defined(PETSC_HAVE_MUMPS) 5598 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5599 #endif 5600 #if defined(PETSC_HAVE_PASTIX) 5601 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5602 #endif 5603 #if defined(PETSC_HAVE_SUPERLU_DIST) 5604 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5605 #endif 5606 #if defined(PETSC_HAVE_CLIQUE) 5607 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5608 #endif 5609 EXTERN_C_END 5610 5611 /*MC 5612 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5613 5614 Options Database Keys: 5615 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5616 5617 Level: beginner 5618 5619 .seealso: MatCreateAIJ() 5620 M*/ 5621 5622 EXTERN_C_BEGIN 5623 #undef __FUNCT__ 5624 #define __FUNCT__ "MatCreate_MPIAIJ" 5625 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5626 { 5627 Mat_MPIAIJ *b; 5628 PetscErrorCode ierr; 5629 PetscMPIInt size; 5630 5631 PetscFunctionBegin; 5632 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5633 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5634 B->data = (void*)b; 5635 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5636 B->assembled = PETSC_FALSE; 5637 B->insertmode = NOT_SET_VALUES; 5638 b->size = size; 5639 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5640 5641 /* build cache for off array entries formed */ 5642 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5643 b->donotstash = PETSC_FALSE; 5644 b->colmap = 0; 5645 b->garray = 0; 5646 b->roworiented = PETSC_TRUE; 5647 5648 /* stuff used for matrix vector multiply */ 5649 b->lvec = PETSC_NULL; 5650 b->Mvctx = PETSC_NULL; 5651 5652 /* stuff for MatGetRow() */ 5653 b->rowindices = 0; 5654 b->rowvalues = 0; 5655 b->getrowactive = PETSC_FALSE; 5656 5657 /* flexible pointer used in CUSP/CUSPARSE classes */ 5658 b->spptr = PETSC_NULL; 5659 5660 #if defined(PETSC_HAVE_MUMPS) 5661 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5662 "MatGetFactor_aij_mumps", 5663 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5664 #endif 5665 #if defined(PETSC_HAVE_PASTIX) 5666 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5667 "MatGetFactor_mpiaij_pastix", 5668 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5669 #endif 5670 #if defined(PETSC_HAVE_SUPERLU_DIST) 5671 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5672 "MatGetFactor_mpiaij_superlu_dist", 5673 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5674 #endif 5675 #if defined(PETSC_HAVE_CLIQUE) 5676 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5677 "MatGetFactor_aij_clique", 5678 MatGetFactor_aij_clique);CHKERRQ(ierr); 5679 #endif 5680 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5681 "MatStoreValues_MPIAIJ", 5682 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5683 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5684 "MatRetrieveValues_MPIAIJ", 5685 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5686 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5687 "MatGetDiagonalBlock_MPIAIJ", 5688 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5689 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5690 "MatIsTranspose_MPIAIJ", 5691 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5692 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5693 "MatMPIAIJSetPreallocation_MPIAIJ", 5694 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5695 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5696 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5697 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5698 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5699 "MatDiagonalScaleLocal_MPIAIJ", 5700 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5701 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5702 "MatConvert_MPIAIJ_MPIAIJPERM", 5703 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5704 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5705 "MatConvert_MPIAIJ_MPIAIJCRL", 5706 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5707 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5708 "MatConvert_MPIAIJ_MPISBAIJ", 5709 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5710 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5711 "MatMatMult_MPIDense_MPIAIJ", 5712 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5713 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5714 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5715 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5716 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5717 "MatMatMultNumeric_MPIDense_MPIAIJ", 5718 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5719 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5720 PetscFunctionReturn(0); 5721 } 5722 EXTERN_C_END 5723 5724 #undef __FUNCT__ 5725 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5726 /*@ 5727 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5728 and "off-diagonal" part of the matrix in CSR format. 5729 5730 Collective on MPI_Comm 5731 5732 Input Parameters: 5733 + comm - MPI communicator 5734 . m - number of local rows (Cannot be PETSC_DECIDE) 5735 . n - This value should be the same as the local size used in creating the 5736 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5737 calculated if N is given) For square matrices n is almost always m. 5738 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5739 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5740 . i - row indices for "diagonal" portion of matrix 5741 . j - column indices 5742 . a - matrix values 5743 . oi - row indices for "off-diagonal" portion of matrix 5744 . oj - column indices 5745 - oa - matrix values 5746 5747 Output Parameter: 5748 . mat - the matrix 5749 5750 Level: advanced 5751 5752 Notes: 5753 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5754 must free the arrays once the matrix has been destroyed and not before. 5755 5756 The i and j indices are 0 based 5757 5758 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5759 5760 This sets local rows and cannot be used to set off-processor values. 5761 5762 You cannot later use MatSetValues() to change values in this matrix. 5763 5764 .keywords: matrix, aij, compressed row, sparse, parallel 5765 5766 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5767 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5768 @*/ 5769 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5770 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5771 { 5772 PetscErrorCode ierr; 5773 Mat_MPIAIJ *maij; 5774 5775 PetscFunctionBegin; 5776 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5777 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5778 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5779 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5780 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5781 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5782 maij = (Mat_MPIAIJ*) (*mat)->data; 5783 maij->donotstash = PETSC_TRUE; 5784 (*mat)->preallocated = PETSC_TRUE; 5785 5786 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5787 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5788 5789 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5790 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5791 5792 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5793 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5794 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5795 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5796 5797 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5798 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5799 PetscFunctionReturn(0); 5800 } 5801 5802 /* 5803 Special version for direct calls from Fortran 5804 */ 5805 #include <petsc-private/fortranimpl.h> 5806 5807 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5808 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5809 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5810 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5811 #endif 5812 5813 /* Change these macros so can be used in void function */ 5814 #undef CHKERRQ 5815 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5816 #undef SETERRQ2 5817 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5818 #undef SETERRQ3 5819 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5820 #undef SETERRQ 5821 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5822 5823 EXTERN_C_BEGIN 5824 #undef __FUNCT__ 5825 #define __FUNCT__ "matsetvaluesmpiaij_" 5826 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5827 { 5828 Mat mat = *mmat; 5829 PetscInt m = *mm, n = *mn; 5830 InsertMode addv = *maddv; 5831 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5832 PetscScalar value; 5833 PetscErrorCode ierr; 5834 5835 MatCheckPreallocated(mat,1); 5836 if (mat->insertmode == NOT_SET_VALUES) { 5837 mat->insertmode = addv; 5838 } 5839 #if defined(PETSC_USE_DEBUG) 5840 else if (mat->insertmode != addv) { 5841 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5842 } 5843 #endif 5844 { 5845 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5846 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5847 PetscBool roworiented = aij->roworiented; 5848 5849 /* Some Variables required in the macro */ 5850 Mat A = aij->A; 5851 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5852 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5853 MatScalar *aa = a->a; 5854 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5855 Mat B = aij->B; 5856 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5857 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5858 MatScalar *ba = b->a; 5859 5860 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5861 PetscInt nonew = a->nonew; 5862 MatScalar *ap1,*ap2; 5863 5864 PetscFunctionBegin; 5865 for (i=0; i<m; i++) { 5866 if (im[i] < 0) continue; 5867 #if defined(PETSC_USE_DEBUG) 5868 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5869 #endif 5870 if (im[i] >= rstart && im[i] < rend) { 5871 row = im[i] - rstart; 5872 lastcol1 = -1; 5873 rp1 = aj + ai[row]; 5874 ap1 = aa + ai[row]; 5875 rmax1 = aimax[row]; 5876 nrow1 = ailen[row]; 5877 low1 = 0; 5878 high1 = nrow1; 5879 lastcol2 = -1; 5880 rp2 = bj + bi[row]; 5881 ap2 = ba + bi[row]; 5882 rmax2 = bimax[row]; 5883 nrow2 = bilen[row]; 5884 low2 = 0; 5885 high2 = nrow2; 5886 5887 for (j=0; j<n; j++) { 5888 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5889 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5890 if (in[j] >= cstart && in[j] < cend){ 5891 col = in[j] - cstart; 5892 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5893 } else if (in[j] < 0) continue; 5894 #if defined(PETSC_USE_DEBUG) 5895 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5896 #endif 5897 else { 5898 if (mat->was_assembled) { 5899 if (!aij->colmap) { 5900 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5901 } 5902 #if defined (PETSC_USE_CTABLE) 5903 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5904 col--; 5905 #else 5906 col = aij->colmap[in[j]] - 1; 5907 #endif 5908 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5909 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5910 col = in[j]; 5911 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5912 B = aij->B; 5913 b = (Mat_SeqAIJ*)B->data; 5914 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5915 rp2 = bj + bi[row]; 5916 ap2 = ba + bi[row]; 5917 rmax2 = bimax[row]; 5918 nrow2 = bilen[row]; 5919 low2 = 0; 5920 high2 = nrow2; 5921 bm = aij->B->rmap->n; 5922 ba = b->a; 5923 } 5924 } else col = in[j]; 5925 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5926 } 5927 } 5928 } else { 5929 if (!aij->donotstash) { 5930 if (roworiented) { 5931 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5932 } else { 5933 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5934 } 5935 } 5936 } 5937 }} 5938 PetscFunctionReturnVoid(); 5939 } 5940 EXTERN_C_END 5941 5942