1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 FILE *file; 1303 1304 PetscFunctionBegin; 1305 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1306 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1307 nz = A->nz + B->nz; 1308 if (!rank) { 1309 header[0] = MAT_FILE_CLASSID; 1310 header[1] = mat->rmap->N; 1311 header[2] = mat->cmap->N; 1312 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1313 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1314 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1315 /* get largest number of rows any processor has */ 1316 rlen = mat->rmap->n; 1317 range = mat->rmap->range; 1318 for (i=1; i<size; i++) { 1319 rlen = PetscMax(rlen,range[i+1] - range[i]); 1320 } 1321 } else { 1322 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1323 rlen = mat->rmap->n; 1324 } 1325 1326 /* load up the local row counts */ 1327 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1328 for (i=0; i<mat->rmap->n; i++) { 1329 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1330 } 1331 1332 /* store the row lengths to the file */ 1333 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1334 if (!rank) { 1335 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 for (i=1; i<size; i++) { 1337 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1338 rlen = range[i+1] - range[i]; 1339 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1341 } 1342 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1343 } else { 1344 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1345 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1346 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1347 } 1348 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1349 1350 /* load up the local column indices */ 1351 nzmax = nz; /* )th processor needs space a largest processor needs */ 1352 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1353 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1354 cnt = 0; 1355 for (i=0; i<mat->rmap->n; i++) { 1356 for (j=B->i[i]; j<B->i[i+1]; j++) { 1357 if ( (col = garray[B->j[j]]) > cstart) break; 1358 column_indices[cnt++] = col; 1359 } 1360 for (k=A->i[i]; k<A->i[i+1]; k++) { 1361 column_indices[cnt++] = A->j[k] + cstart; 1362 } 1363 for (; j<B->i[i+1]; j++) { 1364 column_indices[cnt++] = garray[B->j[j]]; 1365 } 1366 } 1367 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1368 1369 /* store the column indices to the file */ 1370 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1371 if (!rank) { 1372 MPI_Status status; 1373 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 for (i=1; i<size; i++) { 1375 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1376 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1377 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1378 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1380 } 1381 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1382 } else { 1383 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1384 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1386 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1387 } 1388 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1389 1390 /* load up the local column values */ 1391 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1392 cnt = 0; 1393 for (i=0; i<mat->rmap->n; i++) { 1394 for (j=B->i[i]; j<B->i[i+1]; j++) { 1395 if ( garray[B->j[j]] > cstart) break; 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 for (k=A->i[i]; k<A->i[i+1]; k++) { 1399 column_values[cnt++] = A->a[k]; 1400 } 1401 for (; j<B->i[i+1]; j++) { 1402 column_values[cnt++] = B->a[j]; 1403 } 1404 } 1405 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1406 1407 /* store the column values to the file */ 1408 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1409 if (!rank) { 1410 MPI_Status status; 1411 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 for (i=1; i<size; i++) { 1413 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1414 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1415 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1416 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1418 } 1419 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1420 } else { 1421 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1422 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1424 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree(column_values);CHKERRQ(ierr); 1427 1428 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1429 if (file) { 1430 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1431 } 1432 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1438 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1439 { 1440 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1441 PetscErrorCode ierr; 1442 PetscMPIInt rank = aij->rank,size = aij->size; 1443 PetscBool isdraw,iascii,isbinary; 1444 PetscViewer sviewer; 1445 PetscViewerFormat format; 1446 1447 PetscFunctionBegin; 1448 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1449 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1450 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1451 if (iascii) { 1452 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1453 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1454 MatInfo info; 1455 PetscBool inodes; 1456 1457 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1458 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1461 if (!inodes) { 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1463 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1464 } else { 1465 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1466 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1467 } 1468 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1470 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1472 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1473 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1474 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1475 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1478 PetscInt inodecount,inodelimit,*inodes; 1479 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1480 if (inodes) { 1481 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1482 } else { 1483 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1484 } 1485 PetscFunctionReturn(0); 1486 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1487 PetscFunctionReturn(0); 1488 } 1489 } else if (isbinary) { 1490 if (size == 1) { 1491 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1492 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1493 } else { 1494 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1495 } 1496 PetscFunctionReturn(0); 1497 } else if (isdraw) { 1498 PetscDraw draw; 1499 PetscBool isnull; 1500 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1501 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1502 } 1503 1504 if (size == 1) { 1505 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1506 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1507 } else { 1508 /* assemble the entire matrix onto first processor. */ 1509 Mat A; 1510 Mat_SeqAIJ *Aloc; 1511 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1512 MatScalar *a; 1513 1514 if (mat->rmap->N > 1024) { 1515 PetscBool flg = PETSC_FALSE; 1516 1517 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1518 if (!flg) { 1519 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1520 } 1521 } 1522 1523 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1524 if (!rank) { 1525 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1526 } else { 1527 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1528 } 1529 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1530 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1531 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1532 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1533 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1534 1535 /* copy over the A part */ 1536 Aloc = (Mat_SeqAIJ*)aij->A->data; 1537 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1538 row = mat->rmap->rstart; 1539 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1540 for (i=0; i<m; i++) { 1541 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1542 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1543 } 1544 aj = Aloc->j; 1545 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1546 1547 /* copy over the B part */ 1548 Aloc = (Mat_SeqAIJ*)aij->B->data; 1549 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1550 row = mat->rmap->rstart; 1551 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1552 ct = cols; 1553 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1554 for (i=0; i<m; i++) { 1555 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1556 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1557 } 1558 ierr = PetscFree(ct);CHKERRQ(ierr); 1559 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1560 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1561 /* 1562 Everyone has to call to draw the matrix since the graphics waits are 1563 synchronized across all processors that share the PetscDraw object 1564 */ 1565 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 if (!rank) { 1567 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1568 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1569 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1570 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1571 } 1572 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1573 ierr = MatDestroy(&A);CHKERRQ(ierr); 1574 } 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatView_MPIAIJ" 1580 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1581 { 1582 PetscErrorCode ierr; 1583 PetscBool iascii,isdraw,issocket,isbinary; 1584 1585 PetscFunctionBegin; 1586 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1587 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1588 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1589 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1590 if (iascii || isdraw || isbinary || issocket) { 1591 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1592 } else { 1593 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1594 } 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatSOR_MPIAIJ" 1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1601 { 1602 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1603 PetscErrorCode ierr; 1604 Vec bb1 = 0; 1605 PetscBool hasop; 1606 1607 PetscFunctionBegin; 1608 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1609 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1610 } 1611 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1618 if (flag & SOR_ZERO_INITIAL_GUESS) { 1619 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1620 its--; 1621 } 1622 1623 while (its--) { 1624 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1626 1627 /* update rhs: bb1 = bb - B*x */ 1628 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1629 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1630 1631 /* local sweep */ 1632 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1633 } 1634 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1635 if (flag & SOR_ZERO_INITIAL_GUESS) { 1636 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1637 its--; 1638 } 1639 while (its--) { 1640 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 1643 /* update rhs: bb1 = bb - B*x */ 1644 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1645 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1646 1647 /* local sweep */ 1648 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1649 } 1650 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1651 if (flag & SOR_ZERO_INITIAL_GUESS) { 1652 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1653 its--; 1654 } 1655 while (its--) { 1656 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1658 1659 /* update rhs: bb1 = bb - B*x */ 1660 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1661 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1662 1663 /* local sweep */ 1664 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1665 } 1666 } else if (flag & SOR_EISENSTAT) { 1667 Vec xx1; 1668 1669 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1670 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1671 1672 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 if (!mat->diag) { 1675 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1676 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1677 } 1678 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1679 if (hasop) { 1680 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1681 } else { 1682 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1683 } 1684 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1685 1686 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1687 1688 /* local sweep */ 1689 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1690 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1691 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1692 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1693 1694 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatPermute_MPIAIJ" 1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1701 { 1702 MPI_Comm comm; 1703 PetscInt first,local_rowsize,local_colsize; 1704 const PetscInt *rows; 1705 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1706 PetscErrorCode ierr; 1707 1708 PetscFunctionBegin; 1709 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1710 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1711 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1712 /* collect the global row permutation and invert it */ 1713 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1714 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1715 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1716 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1717 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1718 /* get the local target indices */ 1719 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1720 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1721 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1722 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1723 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1724 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1725 /* the column permutation is so much easier; 1726 make a local version of 'colp' and invert it */ 1727 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1728 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1729 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1730 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1731 /* now we just get the submatrix */ 1732 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1733 /* clean up */ 1734 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1735 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1742 { 1743 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1744 Mat A = mat->A,B = mat->B; 1745 PetscErrorCode ierr; 1746 PetscReal isend[5],irecv[5]; 1747 1748 PetscFunctionBegin; 1749 info->block_size = 1.0; 1750 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1751 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1752 isend[3] = info->memory; isend[4] = info->mallocs; 1753 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1754 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1755 isend[3] += info->memory; isend[4] += info->mallocs; 1756 if (flag == MAT_LOCAL) { 1757 info->nz_used = isend[0]; 1758 info->nz_allocated = isend[1]; 1759 info->nz_unneeded = isend[2]; 1760 info->memory = isend[3]; 1761 info->mallocs = isend[4]; 1762 } else if (flag == MAT_GLOBAL_MAX) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } else if (flag == MAT_GLOBAL_SUM) { 1770 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1771 info->nz_used = irecv[0]; 1772 info->nz_allocated = irecv[1]; 1773 info->nz_unneeded = irecv[2]; 1774 info->memory = irecv[3]; 1775 info->mallocs = irecv[4]; 1776 } 1777 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1778 info->fill_ratio_needed = 0; 1779 info->factor_mallocs = 0; 1780 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "MatSetOption_MPIAIJ" 1786 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1787 { 1788 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1789 PetscErrorCode ierr; 1790 1791 PetscFunctionBegin; 1792 switch (op) { 1793 case MAT_NEW_NONZERO_LOCATIONS: 1794 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1795 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1796 case MAT_KEEP_NONZERO_PATTERN: 1797 case MAT_NEW_NONZERO_LOCATION_ERR: 1798 case MAT_USE_INODES: 1799 case MAT_IGNORE_ZERO_ENTRIES: 1800 MatCheckPreallocated(A,1); 1801 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1802 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1803 break; 1804 case MAT_ROW_ORIENTED: 1805 a->roworiented = flg; 1806 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1807 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1808 break; 1809 case MAT_NEW_DIAGONALS: 1810 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1811 break; 1812 case MAT_IGNORE_OFF_PROC_ENTRIES: 1813 a->donotstash = flg; 1814 break; 1815 case MAT_SPD: 1816 A->spd_set = PETSC_TRUE; 1817 A->spd = flg; 1818 if (flg) { 1819 A->symmetric = PETSC_TRUE; 1820 A->structurally_symmetric = PETSC_TRUE; 1821 A->symmetric_set = PETSC_TRUE; 1822 A->structurally_symmetric_set = PETSC_TRUE; 1823 } 1824 break; 1825 case MAT_SYMMETRIC: 1826 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1827 break; 1828 case MAT_STRUCTURALLY_SYMMETRIC: 1829 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1830 break; 1831 case MAT_HERMITIAN: 1832 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1833 break; 1834 case MAT_SYMMETRY_ETERNAL: 1835 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1836 break; 1837 #if defined(PETSC_HAVE_TXPETSCGPU) 1838 // Sorry, I don't know how else to add MatOptions for data specific 1839 // to GPU classes without protecting against it in this case statement. 1840 // If anyone knows of a better way, let me know. Paul 1841 case MAT_DIAGBLOCK_CSR: 1842 case MAT_OFFDIAGBLOCK_CSR: 1843 case MAT_CSR: 1844 case MAT_DIAGBLOCK_DIA: 1845 case MAT_OFFDIAGBLOCK_DIA: 1846 case MAT_DIA: 1847 case MAT_DIAGBLOCK_ELL: 1848 case MAT_OFFDIAGBLOCK_ELL: 1849 case MAT_ELL: 1850 case MAT_DIAGBLOCK_HYB: 1851 case MAT_OFFDIAGBLOCK_HYB: 1852 case MAT_HYB: 1853 /* Not an error because MatSetOption_MPIAIJCUSP/CUSPARSE handle these options */ 1854 break; 1855 #endif 1856 default: 1857 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1858 } 1859 PetscFunctionReturn(0); 1860 } 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatGetRow_MPIAIJ" 1864 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1865 { 1866 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1867 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1868 PetscErrorCode ierr; 1869 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1870 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1871 PetscInt *cmap,*idx_p; 1872 1873 PetscFunctionBegin; 1874 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1875 mat->getrowactive = PETSC_TRUE; 1876 1877 if (!mat->rowvalues && (idx || v)) { 1878 /* 1879 allocate enough space to hold information from the longest row. 1880 */ 1881 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1882 PetscInt max = 1,tmp; 1883 for (i=0; i<matin->rmap->n; i++) { 1884 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1885 if (max < tmp) { max = tmp; } 1886 } 1887 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1888 } 1889 1890 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1891 lrow = row - rstart; 1892 1893 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1894 if (!v) {pvA = 0; pvB = 0;} 1895 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1896 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1897 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1898 nztot = nzA + nzB; 1899 1900 cmap = mat->garray; 1901 if (v || idx) { 1902 if (nztot) { 1903 /* Sort by increasing column numbers, assuming A and B already sorted */ 1904 PetscInt imark = -1; 1905 if (v) { 1906 *v = v_p = mat->rowvalues; 1907 for (i=0; i<nzB; i++) { 1908 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1909 else break; 1910 } 1911 imark = i; 1912 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1913 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1914 } 1915 if (idx) { 1916 *idx = idx_p = mat->rowindices; 1917 if (imark > -1) { 1918 for (i=0; i<imark; i++) { 1919 idx_p[i] = cmap[cworkB[i]]; 1920 } 1921 } else { 1922 for (i=0; i<nzB; i++) { 1923 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1924 else break; 1925 } 1926 imark = i; 1927 } 1928 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1929 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1930 } 1931 } else { 1932 if (idx) *idx = 0; 1933 if (v) *v = 0; 1934 } 1935 } 1936 *nz = nztot; 1937 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1938 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1944 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1945 { 1946 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1947 1948 PetscFunctionBegin; 1949 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1950 aij->getrowactive = PETSC_FALSE; 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatNorm_MPIAIJ" 1956 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1957 { 1958 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1959 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1960 PetscErrorCode ierr; 1961 PetscInt i,j,cstart = mat->cmap->rstart; 1962 PetscReal sum = 0.0; 1963 MatScalar *v; 1964 1965 PetscFunctionBegin; 1966 if (aij->size == 1) { 1967 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1968 } else { 1969 if (type == NORM_FROBENIUS) { 1970 v = amat->a; 1971 for (i=0; i<amat->nz; i++) { 1972 #if defined(PETSC_USE_COMPLEX) 1973 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1974 #else 1975 sum += (*v)*(*v); v++; 1976 #endif 1977 } 1978 v = bmat->a; 1979 for (i=0; i<bmat->nz; i++) { 1980 #if defined(PETSC_USE_COMPLEX) 1981 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1982 #else 1983 sum += (*v)*(*v); v++; 1984 #endif 1985 } 1986 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1987 *norm = PetscSqrtReal(*norm); 1988 } else if (type == NORM_1) { /* max column norm */ 1989 PetscReal *tmp,*tmp2; 1990 PetscInt *jj,*garray = aij->garray; 1991 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1992 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1993 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1994 *norm = 0.0; 1995 v = amat->a; jj = amat->j; 1996 for (j=0; j<amat->nz; j++) { 1997 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1998 } 1999 v = bmat->a; jj = bmat->j; 2000 for (j=0; j<bmat->nz; j++) { 2001 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2002 } 2003 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2004 for (j=0; j<mat->cmap->N; j++) { 2005 if (tmp2[j] > *norm) *norm = tmp2[j]; 2006 } 2007 ierr = PetscFree(tmp);CHKERRQ(ierr); 2008 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2009 } else if (type == NORM_INFINITY) { /* max row norm */ 2010 PetscReal ntemp = 0.0; 2011 for (j=0; j<aij->A->rmap->n; j++) { 2012 v = amat->a + amat->i[j]; 2013 sum = 0.0; 2014 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2015 sum += PetscAbsScalar(*v); v++; 2016 } 2017 v = bmat->a + bmat->i[j]; 2018 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2019 sum += PetscAbsScalar(*v); v++; 2020 } 2021 if (sum > ntemp) ntemp = sum; 2022 } 2023 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2024 } else { 2025 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2026 } 2027 } 2028 PetscFunctionReturn(0); 2029 } 2030 2031 #undef __FUNCT__ 2032 #define __FUNCT__ "MatTranspose_MPIAIJ" 2033 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2034 { 2035 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2036 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2037 PetscErrorCode ierr; 2038 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2039 PetscInt cstart=A->cmap->rstart,ncol; 2040 Mat B; 2041 MatScalar *array; 2042 2043 PetscFunctionBegin; 2044 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2045 2046 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2047 ai = Aloc->i; aj = Aloc->j; 2048 bi = Bloc->i; bj = Bloc->j; 2049 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2050 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2051 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2052 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2053 for (i=0; i<ai[ma]; i++){ 2054 d_nnz[aj[i]] ++; 2055 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2056 } 2057 2058 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2059 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2060 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2061 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2062 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2063 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2064 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2065 } else { 2066 B = *matout; 2067 } 2068 2069 /* copy over the A part */ 2070 array = Aloc->a; 2071 row = A->rmap->rstart; 2072 for (i=0; i<ma; i++) { 2073 ncol = ai[i+1]-ai[i]; 2074 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2075 row++; array += ncol; aj += ncol; 2076 } 2077 aj = Aloc->j; 2078 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2079 2080 /* copy over the B part */ 2081 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2082 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2083 array = Bloc->a; 2084 row = A->rmap->rstart; 2085 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2086 cols_tmp = cols; 2087 for (i=0; i<mb; i++) { 2088 ncol = bi[i+1]-bi[i]; 2089 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2090 row++; array += ncol; cols_tmp += ncol; 2091 } 2092 ierr = PetscFree(cols);CHKERRQ(ierr); 2093 2094 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2095 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2096 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2097 *matout = B; 2098 } else { 2099 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2100 } 2101 PetscFunctionReturn(0); 2102 } 2103 2104 #undef __FUNCT__ 2105 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2106 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2107 { 2108 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2109 Mat a = aij->A,b = aij->B; 2110 PetscErrorCode ierr; 2111 PetscInt s1,s2,s3; 2112 2113 PetscFunctionBegin; 2114 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2115 if (rr) { 2116 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2117 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2118 /* Overlap communication with computation. */ 2119 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2120 } 2121 if (ll) { 2122 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2123 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2124 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2125 } 2126 /* scale the diagonal block */ 2127 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2128 2129 if (rr) { 2130 /* Do a scatter end and then right scale the off-diagonal block */ 2131 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2132 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2133 } 2134 2135 PetscFunctionReturn(0); 2136 } 2137 2138 #undef __FUNCT__ 2139 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2140 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2141 { 2142 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2143 PetscErrorCode ierr; 2144 2145 PetscFunctionBegin; 2146 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatEqual_MPIAIJ" 2152 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2153 { 2154 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2155 Mat a,b,c,d; 2156 PetscBool flg; 2157 PetscErrorCode ierr; 2158 2159 PetscFunctionBegin; 2160 a = matA->A; b = matA->B; 2161 c = matB->A; d = matB->B; 2162 2163 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2164 if (flg) { 2165 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2166 } 2167 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2168 PetscFunctionReturn(0); 2169 } 2170 2171 #undef __FUNCT__ 2172 #define __FUNCT__ "MatCopy_MPIAIJ" 2173 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2174 { 2175 PetscErrorCode ierr; 2176 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2177 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2178 2179 PetscFunctionBegin; 2180 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2181 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2182 /* because of the column compression in the off-processor part of the matrix a->B, 2183 the number of columns in a->B and b->B may be different, hence we cannot call 2184 the MatCopy() directly on the two parts. If need be, we can provide a more 2185 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2186 then copying the submatrices */ 2187 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2188 } else { 2189 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2190 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2191 } 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatSetUp_MPIAIJ" 2197 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2198 { 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2203 PetscFunctionReturn(0); 2204 } 2205 2206 #undef __FUNCT__ 2207 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2208 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2209 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2210 { 2211 PetscInt i,m=Y->rmap->N; 2212 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2213 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2214 const PetscInt *xi = x->i,*yi = y->i; 2215 2216 PetscFunctionBegin; 2217 /* Set the number of nonzeros in the new matrix */ 2218 for(i=0; i<m; i++) { 2219 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2220 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2221 nnz[i] = 0; 2222 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2223 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2224 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2225 nnz[i]++; 2226 } 2227 for (; k<nzy; k++) nnz[i]++; 2228 } 2229 PetscFunctionReturn(0); 2230 } 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "MatAXPY_MPIAIJ" 2234 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2235 { 2236 PetscErrorCode ierr; 2237 PetscInt i; 2238 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2239 PetscBLASInt bnz,one=1; 2240 Mat_SeqAIJ *x,*y; 2241 2242 PetscFunctionBegin; 2243 if (str == SAME_NONZERO_PATTERN) { 2244 PetscScalar alpha = a; 2245 x = (Mat_SeqAIJ *)xx->A->data; 2246 y = (Mat_SeqAIJ *)yy->A->data; 2247 bnz = PetscBLASIntCast(x->nz); 2248 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2249 x = (Mat_SeqAIJ *)xx->B->data; 2250 y = (Mat_SeqAIJ *)yy->B->data; 2251 bnz = PetscBLASIntCast(x->nz); 2252 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2253 } else if (str == SUBSET_NONZERO_PATTERN) { 2254 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2255 2256 x = (Mat_SeqAIJ *)xx->B->data; 2257 y = (Mat_SeqAIJ *)yy->B->data; 2258 if (y->xtoy && y->XtoY != xx->B) { 2259 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2260 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2261 } 2262 if (!y->xtoy) { /* get xtoy */ 2263 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2264 y->XtoY = xx->B; 2265 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2266 } 2267 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2268 } else { 2269 Mat B; 2270 PetscInt *nnz_d,*nnz_o; 2271 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2272 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2273 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2274 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2275 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2276 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2277 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2278 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2279 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2280 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2281 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2282 ierr = MatHeaderReplace(Y,B); 2283 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2284 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2285 } 2286 PetscFunctionReturn(0); 2287 } 2288 2289 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatConjugate_MPIAIJ" 2293 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2294 { 2295 #if defined(PETSC_USE_COMPLEX) 2296 PetscErrorCode ierr; 2297 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2298 2299 PetscFunctionBegin; 2300 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2301 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2302 #else 2303 PetscFunctionBegin; 2304 #endif 2305 PetscFunctionReturn(0); 2306 } 2307 2308 #undef __FUNCT__ 2309 #define __FUNCT__ "MatRealPart_MPIAIJ" 2310 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2311 { 2312 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2313 PetscErrorCode ierr; 2314 2315 PetscFunctionBegin; 2316 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2317 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2323 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2324 { 2325 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2326 PetscErrorCode ierr; 2327 2328 PetscFunctionBegin; 2329 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2330 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2331 PetscFunctionReturn(0); 2332 } 2333 2334 #ifdef PETSC_HAVE_PBGL 2335 2336 #include <boost/parallel/mpi/bsp_process_group.hpp> 2337 #include <boost/graph/distributed/ilu_default_graph.hpp> 2338 #include <boost/graph/distributed/ilu_0_block.hpp> 2339 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2340 #include <boost/graph/distributed/petsc/interface.hpp> 2341 #include <boost/multi_array.hpp> 2342 #include <boost/parallel/distributed_property_map->hpp> 2343 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2346 /* 2347 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2348 */ 2349 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2350 { 2351 namespace petsc = boost::distributed::petsc; 2352 2353 namespace graph_dist = boost::graph::distributed; 2354 using boost::graph::distributed::ilu_default::process_group_type; 2355 using boost::graph::ilu_permuted; 2356 2357 PetscBool row_identity, col_identity; 2358 PetscContainer c; 2359 PetscInt m, n, M, N; 2360 PetscErrorCode ierr; 2361 2362 PetscFunctionBegin; 2363 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2364 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2365 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2366 if (!row_identity || !col_identity) { 2367 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2368 } 2369 2370 process_group_type pg; 2371 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2372 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2373 lgraph_type& level_graph = *lgraph_p; 2374 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2375 2376 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2377 ilu_permuted(level_graph); 2378 2379 /* put together the new matrix */ 2380 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2381 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2382 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2383 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2384 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2385 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2386 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2388 2389 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2390 ierr = PetscContainerSetPointer(c, lgraph_p); 2391 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2392 ierr = PetscContainerDestroy(&c); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2398 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2399 { 2400 PetscFunctionBegin; 2401 PetscFunctionReturn(0); 2402 } 2403 2404 #undef __FUNCT__ 2405 #define __FUNCT__ "MatSolve_MPIAIJ" 2406 /* 2407 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2408 */ 2409 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2410 { 2411 namespace graph_dist = boost::graph::distributed; 2412 2413 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2414 lgraph_type* lgraph_p; 2415 PetscContainer c; 2416 PetscErrorCode ierr; 2417 2418 PetscFunctionBegin; 2419 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2420 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2421 ierr = VecCopy(b, x);CHKERRQ(ierr); 2422 2423 PetscScalar* array_x; 2424 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2425 PetscInt sx; 2426 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2427 2428 PetscScalar* array_b; 2429 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2430 PetscInt sb; 2431 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2432 2433 lgraph_type& level_graph = *lgraph_p; 2434 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2435 2436 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2437 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2438 ref_x(array_x, boost::extents[num_vertices(graph)]); 2439 2440 typedef boost::iterator_property_map<array_ref_type::iterator, 2441 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2442 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2443 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2444 2445 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2446 2447 PetscFunctionReturn(0); 2448 } 2449 #endif 2450 2451 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2452 PetscInt nzlocal,nsends,nrecvs; 2453 PetscMPIInt *send_rank,*recv_rank; 2454 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2455 PetscScalar *sbuf_a,**rbuf_a; 2456 PetscErrorCode (*Destroy)(Mat); 2457 } Mat_Redundant; 2458 2459 #undef __FUNCT__ 2460 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2461 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2462 { 2463 PetscErrorCode ierr; 2464 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2465 PetscInt i; 2466 2467 PetscFunctionBegin; 2468 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2469 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2470 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2471 for (i=0; i<redund->nrecvs; i++){ 2472 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2473 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2474 } 2475 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2476 ierr = PetscFree(redund);CHKERRQ(ierr); 2477 PetscFunctionReturn(0); 2478 } 2479 2480 #undef __FUNCT__ 2481 #define __FUNCT__ "MatDestroy_MatRedundant" 2482 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2483 { 2484 PetscErrorCode ierr; 2485 PetscContainer container; 2486 Mat_Redundant *redund=PETSC_NULL; 2487 2488 PetscFunctionBegin; 2489 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2490 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2491 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2492 A->ops->destroy = redund->Destroy; 2493 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2494 if (A->ops->destroy) { 2495 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2496 } 2497 PetscFunctionReturn(0); 2498 } 2499 2500 #undef __FUNCT__ 2501 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2502 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2503 { 2504 PetscMPIInt rank,size; 2505 MPI_Comm comm=((PetscObject)mat)->comm; 2506 PetscErrorCode ierr; 2507 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2508 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2509 PetscInt *rowrange=mat->rmap->range; 2510 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2511 Mat A=aij->A,B=aij->B,C=*matredundant; 2512 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2513 PetscScalar *sbuf_a; 2514 PetscInt nzlocal=a->nz+b->nz; 2515 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2516 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2517 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2518 MatScalar *aworkA,*aworkB; 2519 PetscScalar *vals; 2520 PetscMPIInt tag1,tag2,tag3,imdex; 2521 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2522 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2523 MPI_Status recv_status,*send_status; 2524 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2525 PetscInt **rbuf_j=PETSC_NULL; 2526 PetscScalar **rbuf_a=PETSC_NULL; 2527 Mat_Redundant *redund=PETSC_NULL; 2528 PetscContainer container; 2529 2530 PetscFunctionBegin; 2531 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2532 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2533 2534 if (reuse == MAT_REUSE_MATRIX) { 2535 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2536 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2537 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2538 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2539 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2540 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2541 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2542 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2543 2544 nsends = redund->nsends; 2545 nrecvs = redund->nrecvs; 2546 send_rank = redund->send_rank; 2547 recv_rank = redund->recv_rank; 2548 sbuf_nz = redund->sbuf_nz; 2549 rbuf_nz = redund->rbuf_nz; 2550 sbuf_j = redund->sbuf_j; 2551 sbuf_a = redund->sbuf_a; 2552 rbuf_j = redund->rbuf_j; 2553 rbuf_a = redund->rbuf_a; 2554 } 2555 2556 if (reuse == MAT_INITIAL_MATRIX){ 2557 PetscMPIInt subrank,subsize; 2558 PetscInt nleftover,np_subcomm; 2559 /* get the destination processors' id send_rank, nsends and nrecvs */ 2560 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2561 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2562 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2563 np_subcomm = size/nsubcomm; 2564 nleftover = size - nsubcomm*np_subcomm; 2565 nsends = 0; nrecvs = 0; 2566 for (i=0; i<size; i++){ /* i=rank*/ 2567 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2568 send_rank[nsends] = i; nsends++; 2569 recv_rank[nrecvs++] = i; 2570 } 2571 } 2572 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2573 i = size-nleftover-1; 2574 j = 0; 2575 while (j < nsubcomm - nleftover){ 2576 send_rank[nsends++] = i; 2577 i--; j++; 2578 } 2579 } 2580 2581 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2582 for (i=0; i<nleftover; i++){ 2583 recv_rank[nrecvs++] = size-nleftover+i; 2584 } 2585 } 2586 2587 /* allocate sbuf_j, sbuf_a */ 2588 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2589 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2590 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2591 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2592 2593 /* copy mat's local entries into the buffers */ 2594 if (reuse == MAT_INITIAL_MATRIX){ 2595 rownz_max = 0; 2596 rptr = sbuf_j; 2597 cols = sbuf_j + rend-rstart + 1; 2598 vals = sbuf_a; 2599 rptr[0] = 0; 2600 for (i=0; i<rend-rstart; i++){ 2601 row = i + rstart; 2602 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2603 ncols = nzA + nzB; 2604 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2605 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2606 /* load the column indices for this row into cols */ 2607 lwrite = 0; 2608 for (l=0; l<nzB; l++) { 2609 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2610 vals[lwrite] = aworkB[l]; 2611 cols[lwrite++] = ctmp; 2612 } 2613 } 2614 for (l=0; l<nzA; l++){ 2615 vals[lwrite] = aworkA[l]; 2616 cols[lwrite++] = cstart + cworkA[l]; 2617 } 2618 for (l=0; l<nzB; l++) { 2619 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2620 vals[lwrite] = aworkB[l]; 2621 cols[lwrite++] = ctmp; 2622 } 2623 } 2624 vals += ncols; 2625 cols += ncols; 2626 rptr[i+1] = rptr[i] + ncols; 2627 if (rownz_max < ncols) rownz_max = ncols; 2628 } 2629 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2630 } else { /* only copy matrix values into sbuf_a */ 2631 rptr = sbuf_j; 2632 vals = sbuf_a; 2633 rptr[0] = 0; 2634 for (i=0; i<rend-rstart; i++){ 2635 row = i + rstart; 2636 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2637 ncols = nzA + nzB; 2638 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2639 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2640 lwrite = 0; 2641 for (l=0; l<nzB; l++) { 2642 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2643 } 2644 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2645 for (l=0; l<nzB; l++) { 2646 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2647 } 2648 vals += ncols; 2649 rptr[i+1] = rptr[i] + ncols; 2650 } 2651 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2652 2653 /* send nzlocal to others, and recv other's nzlocal */ 2654 /*--------------------------------------------------*/ 2655 if (reuse == MAT_INITIAL_MATRIX){ 2656 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2657 s_waits2 = s_waits3 + nsends; 2658 s_waits1 = s_waits2 + nsends; 2659 r_waits1 = s_waits1 + nsends; 2660 r_waits2 = r_waits1 + nrecvs; 2661 r_waits3 = r_waits2 + nrecvs; 2662 } else { 2663 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2664 r_waits3 = s_waits3 + nsends; 2665 } 2666 2667 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2668 if (reuse == MAT_INITIAL_MATRIX){ 2669 /* get new tags to keep the communication clean */ 2670 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2671 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2672 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2673 2674 /* post receives of other's nzlocal */ 2675 for (i=0; i<nrecvs; i++){ 2676 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2677 } 2678 /* send nzlocal to others */ 2679 for (i=0; i<nsends; i++){ 2680 sbuf_nz[i] = nzlocal; 2681 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2682 } 2683 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2684 count = nrecvs; 2685 while (count) { 2686 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2687 recv_rank[imdex] = recv_status.MPI_SOURCE; 2688 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2689 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2690 2691 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2692 rbuf_nz[imdex] += i + 2; 2693 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2694 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2695 count--; 2696 } 2697 /* wait on sends of nzlocal */ 2698 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2699 /* send mat->i,j to others, and recv from other's */ 2700 /*------------------------------------------------*/ 2701 for (i=0; i<nsends; i++){ 2702 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2703 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2704 } 2705 /* wait on receives of mat->i,j */ 2706 /*------------------------------*/ 2707 count = nrecvs; 2708 while (count) { 2709 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2710 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2711 count--; 2712 } 2713 /* wait on sends of mat->i,j */ 2714 /*---------------------------*/ 2715 if (nsends) { 2716 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2717 } 2718 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2719 2720 /* post receives, send and receive mat->a */ 2721 /*----------------------------------------*/ 2722 for (imdex=0; imdex<nrecvs; imdex++) { 2723 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2724 } 2725 for (i=0; i<nsends; i++){ 2726 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2727 } 2728 count = nrecvs; 2729 while (count) { 2730 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2731 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2732 count--; 2733 } 2734 if (nsends) { 2735 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2736 } 2737 2738 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2739 2740 /* create redundant matrix */ 2741 /*-------------------------*/ 2742 if (reuse == MAT_INITIAL_MATRIX){ 2743 /* compute rownz_max for preallocation */ 2744 for (imdex=0; imdex<nrecvs; imdex++){ 2745 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2746 rptr = rbuf_j[imdex]; 2747 for (i=0; i<j; i++){ 2748 ncols = rptr[i+1] - rptr[i]; 2749 if (rownz_max < ncols) rownz_max = ncols; 2750 } 2751 } 2752 2753 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2754 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2755 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2756 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2757 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2758 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2759 } else { 2760 C = *matredundant; 2761 } 2762 2763 /* insert local matrix entries */ 2764 rptr = sbuf_j; 2765 cols = sbuf_j + rend-rstart + 1; 2766 vals = sbuf_a; 2767 for (i=0; i<rend-rstart; i++){ 2768 row = i + rstart; 2769 ncols = rptr[i+1] - rptr[i]; 2770 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2771 vals += ncols; 2772 cols += ncols; 2773 } 2774 /* insert received matrix entries */ 2775 for (imdex=0; imdex<nrecvs; imdex++){ 2776 rstart = rowrange[recv_rank[imdex]]; 2777 rend = rowrange[recv_rank[imdex]+1]; 2778 rptr = rbuf_j[imdex]; 2779 cols = rbuf_j[imdex] + rend-rstart + 1; 2780 vals = rbuf_a[imdex]; 2781 for (i=0; i<rend-rstart; i++){ 2782 row = i + rstart; 2783 ncols = rptr[i+1] - rptr[i]; 2784 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2785 vals += ncols; 2786 cols += ncols; 2787 } 2788 } 2789 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2790 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2791 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2792 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2793 if (reuse == MAT_INITIAL_MATRIX) { 2794 PetscContainer container; 2795 *matredundant = C; 2796 /* create a supporting struct and attach it to C for reuse */ 2797 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2798 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2799 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2800 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2801 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2802 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2803 2804 redund->nzlocal = nzlocal; 2805 redund->nsends = nsends; 2806 redund->nrecvs = nrecvs; 2807 redund->send_rank = send_rank; 2808 redund->recv_rank = recv_rank; 2809 redund->sbuf_nz = sbuf_nz; 2810 redund->rbuf_nz = rbuf_nz; 2811 redund->sbuf_j = sbuf_j; 2812 redund->sbuf_a = sbuf_a; 2813 redund->rbuf_j = rbuf_j; 2814 redund->rbuf_a = rbuf_a; 2815 2816 redund->Destroy = C->ops->destroy; 2817 C->ops->destroy = MatDestroy_MatRedundant; 2818 } 2819 PetscFunctionReturn(0); 2820 } 2821 2822 #undef __FUNCT__ 2823 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2824 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2825 { 2826 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2827 PetscErrorCode ierr; 2828 PetscInt i,*idxb = 0; 2829 PetscScalar *va,*vb; 2830 Vec vtmp; 2831 2832 PetscFunctionBegin; 2833 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2834 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2835 if (idx) { 2836 for (i=0; i<A->rmap->n; i++) { 2837 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2838 } 2839 } 2840 2841 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2842 if (idx) { 2843 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2844 } 2845 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2846 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2847 2848 for (i=0; i<A->rmap->n; i++){ 2849 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2850 va[i] = vb[i]; 2851 if (idx) idx[i] = a->garray[idxb[i]]; 2852 } 2853 } 2854 2855 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2856 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2857 ierr = PetscFree(idxb);CHKERRQ(ierr); 2858 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2859 PetscFunctionReturn(0); 2860 } 2861 2862 #undef __FUNCT__ 2863 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2864 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2865 { 2866 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2867 PetscErrorCode ierr; 2868 PetscInt i,*idxb = 0; 2869 PetscScalar *va,*vb; 2870 Vec vtmp; 2871 2872 PetscFunctionBegin; 2873 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2874 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2875 if (idx) { 2876 for (i=0; i<A->cmap->n; i++) { 2877 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2878 } 2879 } 2880 2881 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2882 if (idx) { 2883 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2884 } 2885 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2886 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2887 2888 for (i=0; i<A->rmap->n; i++){ 2889 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2890 va[i] = vb[i]; 2891 if (idx) idx[i] = a->garray[idxb[i]]; 2892 } 2893 } 2894 2895 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2896 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2897 ierr = PetscFree(idxb);CHKERRQ(ierr); 2898 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2899 PetscFunctionReturn(0); 2900 } 2901 2902 #undef __FUNCT__ 2903 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2904 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2905 { 2906 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2907 PetscInt n = A->rmap->n; 2908 PetscInt cstart = A->cmap->rstart; 2909 PetscInt *cmap = mat->garray; 2910 PetscInt *diagIdx, *offdiagIdx; 2911 Vec diagV, offdiagV; 2912 PetscScalar *a, *diagA, *offdiagA; 2913 PetscInt r; 2914 PetscErrorCode ierr; 2915 2916 PetscFunctionBegin; 2917 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2918 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2919 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2920 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2921 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2922 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2923 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2924 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2925 for(r = 0; r < n; ++r) { 2926 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2927 a[r] = diagA[r]; 2928 idx[r] = cstart + diagIdx[r]; 2929 } else { 2930 a[r] = offdiagA[r]; 2931 idx[r] = cmap[offdiagIdx[r]]; 2932 } 2933 } 2934 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2935 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2936 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2937 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2938 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2939 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2940 PetscFunctionReturn(0); 2941 } 2942 2943 #undef __FUNCT__ 2944 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2945 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2946 { 2947 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2948 PetscInt n = A->rmap->n; 2949 PetscInt cstart = A->cmap->rstart; 2950 PetscInt *cmap = mat->garray; 2951 PetscInt *diagIdx, *offdiagIdx; 2952 Vec diagV, offdiagV; 2953 PetscScalar *a, *diagA, *offdiagA; 2954 PetscInt r; 2955 PetscErrorCode ierr; 2956 2957 PetscFunctionBegin; 2958 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2959 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2960 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2961 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2962 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2963 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2964 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2965 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2966 for(r = 0; r < n; ++r) { 2967 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2968 a[r] = diagA[r]; 2969 idx[r] = cstart + diagIdx[r]; 2970 } else { 2971 a[r] = offdiagA[r]; 2972 idx[r] = cmap[offdiagIdx[r]]; 2973 } 2974 } 2975 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2976 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2977 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2978 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2979 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2980 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2981 PetscFunctionReturn(0); 2982 } 2983 2984 #undef __FUNCT__ 2985 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2986 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2987 { 2988 PetscErrorCode ierr; 2989 Mat *dummy; 2990 2991 PetscFunctionBegin; 2992 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2993 *newmat = *dummy; 2994 ierr = PetscFree(dummy);CHKERRQ(ierr); 2995 PetscFunctionReturn(0); 2996 } 2997 2998 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2999 3000 #undef __FUNCT__ 3001 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3002 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3003 { 3004 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3005 PetscErrorCode ierr; 3006 3007 PetscFunctionBegin; 3008 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3009 PetscFunctionReturn(0); 3010 } 3011 3012 3013 /* -------------------------------------------------------------------*/ 3014 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3015 MatGetRow_MPIAIJ, 3016 MatRestoreRow_MPIAIJ, 3017 MatMult_MPIAIJ, 3018 /* 4*/ MatMultAdd_MPIAIJ, 3019 MatMultTranspose_MPIAIJ, 3020 MatMultTransposeAdd_MPIAIJ, 3021 #ifdef PETSC_HAVE_PBGL 3022 MatSolve_MPIAIJ, 3023 #else 3024 0, 3025 #endif 3026 0, 3027 0, 3028 /*10*/ 0, 3029 0, 3030 0, 3031 MatSOR_MPIAIJ, 3032 MatTranspose_MPIAIJ, 3033 /*15*/ MatGetInfo_MPIAIJ, 3034 MatEqual_MPIAIJ, 3035 MatGetDiagonal_MPIAIJ, 3036 MatDiagonalScale_MPIAIJ, 3037 MatNorm_MPIAIJ, 3038 /*20*/ MatAssemblyBegin_MPIAIJ, 3039 MatAssemblyEnd_MPIAIJ, 3040 MatSetOption_MPIAIJ, 3041 MatZeroEntries_MPIAIJ, 3042 /*24*/ MatZeroRows_MPIAIJ, 3043 0, 3044 #ifdef PETSC_HAVE_PBGL 3045 0, 3046 #else 3047 0, 3048 #endif 3049 0, 3050 0, 3051 /*29*/ MatSetUp_MPIAIJ, 3052 #ifdef PETSC_HAVE_PBGL 3053 0, 3054 #else 3055 0, 3056 #endif 3057 0, 3058 0, 3059 0, 3060 /*34*/ MatDuplicate_MPIAIJ, 3061 0, 3062 0, 3063 0, 3064 0, 3065 /*39*/ MatAXPY_MPIAIJ, 3066 MatGetSubMatrices_MPIAIJ, 3067 MatIncreaseOverlap_MPIAIJ, 3068 MatGetValues_MPIAIJ, 3069 MatCopy_MPIAIJ, 3070 /*44*/ MatGetRowMax_MPIAIJ, 3071 MatScale_MPIAIJ, 3072 0, 3073 0, 3074 MatZeroRowsColumns_MPIAIJ, 3075 /*49*/ 0, 3076 0, 3077 0, 3078 0, 3079 0, 3080 /*54*/ MatFDColoringCreate_MPIAIJ, 3081 0, 3082 MatSetUnfactored_MPIAIJ, 3083 0, /* MatPermute_MPIAIJ, impl currently broken */ 3084 0, 3085 /*59*/ MatGetSubMatrix_MPIAIJ, 3086 MatDestroy_MPIAIJ, 3087 MatView_MPIAIJ, 3088 0, 3089 0, 3090 /*64*/ 0, 3091 0, 3092 0, 3093 0, 3094 0, 3095 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3096 MatGetRowMinAbs_MPIAIJ, 3097 0, 3098 MatSetColoring_MPIAIJ, 3099 #if defined(PETSC_HAVE_ADIC) 3100 MatSetValuesAdic_MPIAIJ, 3101 #else 3102 0, 3103 #endif 3104 MatSetValuesAdifor_MPIAIJ, 3105 /*75*/ MatFDColoringApply_AIJ, 3106 0, 3107 0, 3108 0, 3109 0, 3110 /*80*/ 0, 3111 0, 3112 0, 3113 /*83*/ MatLoad_MPIAIJ, 3114 0, 3115 0, 3116 0, 3117 0, 3118 0, 3119 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3120 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3121 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3122 MatPtAP_Basic, 3123 MatPtAPSymbolic_MPIAIJ, 3124 /*94*/ MatPtAPNumeric_MPIAIJ, 3125 0, 3126 0, 3127 0, 3128 0, 3129 /*99*/ 0, 3130 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3131 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3132 MatConjugate_MPIAIJ, 3133 0, 3134 /*104*/MatSetValuesRow_MPIAIJ, 3135 MatRealPart_MPIAIJ, 3136 MatImaginaryPart_MPIAIJ, 3137 0, 3138 0, 3139 /*109*/0, 3140 MatGetRedundantMatrix_MPIAIJ, 3141 MatGetRowMin_MPIAIJ, 3142 0, 3143 0, 3144 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3145 0, 3146 0, 3147 0, 3148 0, 3149 /*119*/0, 3150 0, 3151 0, 3152 0, 3153 MatGetMultiProcBlock_MPIAIJ, 3154 /*124*/MatFindNonZeroRows_MPIAIJ, 3155 MatGetColumnNorms_MPIAIJ, 3156 MatInvertBlockDiagonal_MPIAIJ, 3157 0, 3158 MatGetSubMatricesParallel_MPIAIJ, 3159 /*129*/0, 3160 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3161 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3162 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3163 0, 3164 /*134*/0, 3165 0, 3166 0, 3167 0, 3168 0 3169 }; 3170 3171 /* ----------------------------------------------------------------------------------------*/ 3172 3173 EXTERN_C_BEGIN 3174 #undef __FUNCT__ 3175 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3176 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3177 { 3178 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3179 PetscErrorCode ierr; 3180 3181 PetscFunctionBegin; 3182 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3183 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3184 PetscFunctionReturn(0); 3185 } 3186 EXTERN_C_END 3187 3188 EXTERN_C_BEGIN 3189 #undef __FUNCT__ 3190 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3191 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3192 { 3193 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3194 PetscErrorCode ierr; 3195 3196 PetscFunctionBegin; 3197 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3198 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3199 PetscFunctionReturn(0); 3200 } 3201 EXTERN_C_END 3202 3203 EXTERN_C_BEGIN 3204 #undef __FUNCT__ 3205 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3206 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3207 { 3208 Mat_MPIAIJ *b; 3209 PetscErrorCode ierr; 3210 PetscInt i; 3211 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3212 3213 PetscFunctionBegin; 3214 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3215 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3216 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3217 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3218 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3219 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3220 3221 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3222 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3223 if (d_nnz) { 3224 for (i=0; i<B->rmap->n; i++) { 3225 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3226 } 3227 } 3228 if (o_nnz) { 3229 for (i=0; i<B->rmap->n; i++) { 3230 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3231 } 3232 } 3233 b = (Mat_MPIAIJ*)B->data; 3234 3235 if (!B->preallocated) { 3236 /* Explicitly create 2 MATSEQAIJ matrices. */ 3237 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3238 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3239 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3240 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3241 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3242 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3243 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3244 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3245 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3246 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3247 } 3248 3249 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3250 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3251 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3252 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3253 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3254 B->preallocated = PETSC_TRUE; 3255 PetscFunctionReturn(0); 3256 } 3257 EXTERN_C_END 3258 3259 #undef __FUNCT__ 3260 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3261 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3262 { 3263 Mat mat; 3264 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3265 PetscErrorCode ierr; 3266 3267 PetscFunctionBegin; 3268 *newmat = 0; 3269 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3270 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3271 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3272 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3273 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3274 a = (Mat_MPIAIJ*)mat->data; 3275 3276 mat->factortype = matin->factortype; 3277 mat->rmap->bs = matin->rmap->bs; 3278 mat->cmap->bs = matin->cmap->bs; 3279 mat->assembled = PETSC_TRUE; 3280 mat->insertmode = NOT_SET_VALUES; 3281 mat->preallocated = PETSC_TRUE; 3282 3283 a->size = oldmat->size; 3284 a->rank = oldmat->rank; 3285 a->donotstash = oldmat->donotstash; 3286 a->roworiented = oldmat->roworiented; 3287 a->rowindices = 0; 3288 a->rowvalues = 0; 3289 a->getrowactive = PETSC_FALSE; 3290 3291 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3292 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3293 3294 if (oldmat->colmap) { 3295 #if defined (PETSC_USE_CTABLE) 3296 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3297 #else 3298 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3299 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3300 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3301 #endif 3302 } else a->colmap = 0; 3303 if (oldmat->garray) { 3304 PetscInt len; 3305 len = oldmat->B->cmap->n; 3306 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3307 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3308 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3309 } else a->garray = 0; 3310 3311 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3312 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3313 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3314 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3315 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3316 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3317 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3318 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3319 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3320 *newmat = mat; 3321 PetscFunctionReturn(0); 3322 } 3323 3324 3325 3326 #undef __FUNCT__ 3327 #define __FUNCT__ "MatLoad_MPIAIJ" 3328 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3329 { 3330 PetscScalar *vals,*svals; 3331 MPI_Comm comm = ((PetscObject)viewer)->comm; 3332 PetscErrorCode ierr; 3333 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3334 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3335 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3336 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3337 PetscInt cend,cstart,n,*rowners,sizesset=1; 3338 int fd; 3339 PetscInt bs = 1; 3340 3341 PetscFunctionBegin; 3342 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3343 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3344 if (!rank) { 3345 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3346 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3347 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3348 } 3349 3350 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3351 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3352 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3353 3354 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3355 3356 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3357 M = header[1]; N = header[2]; 3358 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3359 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3360 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3361 3362 /* If global sizes are set, check if they are consistent with that given in the file */ 3363 if (sizesset) { 3364 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3365 } 3366 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3367 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3368 3369 /* determine ownership of all (block) rows */ 3370 if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3371 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3372 else m = newMat->rmap->n; /* Set by user */ 3373 3374 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3375 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3376 3377 /* First process needs enough room for process with most rows */ 3378 if (!rank) { 3379 mmax = rowners[1]; 3380 for (i=2; i<size; i++) { 3381 mmax = PetscMax(mmax,rowners[i]); 3382 } 3383 } else mmax = m; 3384 3385 rowners[0] = 0; 3386 for (i=2; i<=size; i++) { 3387 rowners[i] += rowners[i-1]; 3388 } 3389 rstart = rowners[rank]; 3390 rend = rowners[rank+1]; 3391 3392 /* distribute row lengths to all processors */ 3393 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3394 if (!rank) { 3395 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3396 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3397 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3398 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3399 for (j=0; j<m; j++) { 3400 procsnz[0] += ourlens[j]; 3401 } 3402 for (i=1; i<size; i++) { 3403 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3404 /* calculate the number of nonzeros on each processor */ 3405 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3406 procsnz[i] += rowlengths[j]; 3407 } 3408 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3409 } 3410 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3411 } else { 3412 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3413 } 3414 3415 if (!rank) { 3416 /* determine max buffer needed and allocate it */ 3417 maxnz = 0; 3418 for (i=0; i<size; i++) { 3419 maxnz = PetscMax(maxnz,procsnz[i]); 3420 } 3421 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3422 3423 /* read in my part of the matrix column indices */ 3424 nz = procsnz[0]; 3425 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3426 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3427 3428 /* read in every one elses and ship off */ 3429 for (i=1; i<size; i++) { 3430 nz = procsnz[i]; 3431 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3432 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3433 } 3434 ierr = PetscFree(cols);CHKERRQ(ierr); 3435 } else { 3436 /* determine buffer space needed for message */ 3437 nz = 0; 3438 for (i=0; i<m; i++) { 3439 nz += ourlens[i]; 3440 } 3441 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3442 3443 /* receive message of column indices*/ 3444 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3445 } 3446 3447 /* determine column ownership if matrix is not square */ 3448 if (N != M) { 3449 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3450 else n = newMat->cmap->n; 3451 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3452 cstart = cend - n; 3453 } else { 3454 cstart = rstart; 3455 cend = rend; 3456 n = cend - cstart; 3457 } 3458 3459 /* loop over local rows, determining number of off diagonal entries */ 3460 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3461 jj = 0; 3462 for (i=0; i<m; i++) { 3463 for (j=0; j<ourlens[i]; j++) { 3464 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3465 jj++; 3466 } 3467 } 3468 3469 for (i=0; i<m; i++) { 3470 ourlens[i] -= offlens[i]; 3471 } 3472 if (!sizesset) { 3473 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3474 } 3475 3476 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3477 3478 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3479 3480 for (i=0; i<m; i++) { 3481 ourlens[i] += offlens[i]; 3482 } 3483 3484 if (!rank) { 3485 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3486 3487 /* read in my part of the matrix numerical values */ 3488 nz = procsnz[0]; 3489 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3490 3491 /* insert into matrix */ 3492 jj = rstart; 3493 smycols = mycols; 3494 svals = vals; 3495 for (i=0; i<m; i++) { 3496 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3497 smycols += ourlens[i]; 3498 svals += ourlens[i]; 3499 jj++; 3500 } 3501 3502 /* read in other processors and ship out */ 3503 for (i=1; i<size; i++) { 3504 nz = procsnz[i]; 3505 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3506 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3507 } 3508 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3509 } else { 3510 /* receive numeric values */ 3511 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3512 3513 /* receive message of values*/ 3514 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3515 3516 /* insert into matrix */ 3517 jj = rstart; 3518 smycols = mycols; 3519 svals = vals; 3520 for (i=0; i<m; i++) { 3521 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3522 smycols += ourlens[i]; 3523 svals += ourlens[i]; 3524 jj++; 3525 } 3526 } 3527 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3528 ierr = PetscFree(vals);CHKERRQ(ierr); 3529 ierr = PetscFree(mycols);CHKERRQ(ierr); 3530 ierr = PetscFree(rowners);CHKERRQ(ierr); 3531 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3532 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3533 3534 PetscFunctionReturn(0); 3535 } 3536 3537 #undef __FUNCT__ 3538 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3539 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3540 { 3541 PetscErrorCode ierr; 3542 IS iscol_local; 3543 PetscInt csize; 3544 3545 PetscFunctionBegin; 3546 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3547 if (call == MAT_REUSE_MATRIX) { 3548 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3549 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3550 } else { 3551 PetscInt cbs; 3552 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3553 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3554 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3555 } 3556 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3557 if (call == MAT_INITIAL_MATRIX) { 3558 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3559 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3560 } 3561 PetscFunctionReturn(0); 3562 } 3563 3564 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3565 #undef __FUNCT__ 3566 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3567 /* 3568 Not great since it makes two copies of the submatrix, first an SeqAIJ 3569 in local and then by concatenating the local matrices the end result. 3570 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3571 3572 Note: This requires a sequential iscol with all indices. 3573 */ 3574 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3575 { 3576 PetscErrorCode ierr; 3577 PetscMPIInt rank,size; 3578 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3579 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3580 PetscBool allcolumns, colflag; 3581 Mat M,Mreuse; 3582 MatScalar *vwork,*aa; 3583 MPI_Comm comm = ((PetscObject)mat)->comm; 3584 Mat_SeqAIJ *aij; 3585 3586 3587 PetscFunctionBegin; 3588 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3589 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3590 3591 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3592 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3593 if (colflag && ncol == mat->cmap->N){ 3594 allcolumns = PETSC_TRUE; 3595 } else { 3596 allcolumns = PETSC_FALSE; 3597 } 3598 if (call == MAT_REUSE_MATRIX) { 3599 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3600 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3601 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3602 } else { 3603 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3604 } 3605 3606 /* 3607 m - number of local rows 3608 n - number of columns (same on all processors) 3609 rstart - first row in new global matrix generated 3610 */ 3611 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3612 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3613 if (call == MAT_INITIAL_MATRIX) { 3614 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3615 ii = aij->i; 3616 jj = aij->j; 3617 3618 /* 3619 Determine the number of non-zeros in the diagonal and off-diagonal 3620 portions of the matrix in order to do correct preallocation 3621 */ 3622 3623 /* first get start and end of "diagonal" columns */ 3624 if (csize == PETSC_DECIDE) { 3625 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3626 if (mglobal == n) { /* square matrix */ 3627 nlocal = m; 3628 } else { 3629 nlocal = n/size + ((n % size) > rank); 3630 } 3631 } else { 3632 nlocal = csize; 3633 } 3634 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3635 rstart = rend - nlocal; 3636 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3637 3638 /* next, compute all the lengths */ 3639 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3640 olens = dlens + m; 3641 for (i=0; i<m; i++) { 3642 jend = ii[i+1] - ii[i]; 3643 olen = 0; 3644 dlen = 0; 3645 for (j=0; j<jend; j++) { 3646 if (*jj < rstart || *jj >= rend) olen++; 3647 else dlen++; 3648 jj++; 3649 } 3650 olens[i] = olen; 3651 dlens[i] = dlen; 3652 } 3653 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3654 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3655 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3656 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3657 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3658 ierr = PetscFree(dlens);CHKERRQ(ierr); 3659 } else { 3660 PetscInt ml,nl; 3661 3662 M = *newmat; 3663 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3664 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3665 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3666 /* 3667 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3668 rather than the slower MatSetValues(). 3669 */ 3670 M->was_assembled = PETSC_TRUE; 3671 M->assembled = PETSC_FALSE; 3672 } 3673 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3674 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3675 ii = aij->i; 3676 jj = aij->j; 3677 aa = aij->a; 3678 for (i=0; i<m; i++) { 3679 row = rstart + i; 3680 nz = ii[i+1] - ii[i]; 3681 cwork = jj; jj += nz; 3682 vwork = aa; aa += nz; 3683 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3684 } 3685 3686 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3687 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3688 *newmat = M; 3689 3690 /* save submatrix used in processor for next request */ 3691 if (call == MAT_INITIAL_MATRIX) { 3692 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3693 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3694 } 3695 3696 PetscFunctionReturn(0); 3697 } 3698 3699 EXTERN_C_BEGIN 3700 #undef __FUNCT__ 3701 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3702 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3703 { 3704 PetscInt m,cstart, cend,j,nnz,i,d; 3705 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3706 const PetscInt *JJ; 3707 PetscScalar *values; 3708 PetscErrorCode ierr; 3709 3710 PetscFunctionBegin; 3711 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3712 3713 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3714 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3715 m = B->rmap->n; 3716 cstart = B->cmap->rstart; 3717 cend = B->cmap->rend; 3718 rstart = B->rmap->rstart; 3719 3720 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3721 3722 #if defined(PETSC_USE_DEBUGGING) 3723 for (i=0; i<m; i++) { 3724 nnz = Ii[i+1]- Ii[i]; 3725 JJ = J + Ii[i]; 3726 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3727 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3728 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3729 } 3730 #endif 3731 3732 for (i=0; i<m; i++) { 3733 nnz = Ii[i+1]- Ii[i]; 3734 JJ = J + Ii[i]; 3735 nnz_max = PetscMax(nnz_max,nnz); 3736 d = 0; 3737 for (j=0; j<nnz; j++) { 3738 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3739 } 3740 d_nnz[i] = d; 3741 o_nnz[i] = nnz - d; 3742 } 3743 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3744 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3745 3746 if (v) values = (PetscScalar*)v; 3747 else { 3748 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3749 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3750 } 3751 3752 for (i=0; i<m; i++) { 3753 ii = i + rstart; 3754 nnz = Ii[i+1]- Ii[i]; 3755 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3756 } 3757 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3758 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3759 3760 if (!v) { 3761 ierr = PetscFree(values);CHKERRQ(ierr); 3762 } 3763 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3764 PetscFunctionReturn(0); 3765 } 3766 EXTERN_C_END 3767 3768 #undef __FUNCT__ 3769 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3770 /*@ 3771 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3772 (the default parallel PETSc format). 3773 3774 Collective on MPI_Comm 3775 3776 Input Parameters: 3777 + B - the matrix 3778 . i - the indices into j for the start of each local row (starts with zero) 3779 . j - the column indices for each local row (starts with zero) 3780 - v - optional values in the matrix 3781 3782 Level: developer 3783 3784 Notes: 3785 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3786 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3787 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3788 3789 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3790 3791 The format which is used for the sparse matrix input, is equivalent to a 3792 row-major ordering.. i.e for the following matrix, the input data expected is 3793 as shown: 3794 3795 1 0 0 3796 2 0 3 P0 3797 ------- 3798 4 5 6 P1 3799 3800 Process0 [P0]: rows_owned=[0,1] 3801 i = {0,1,3} [size = nrow+1 = 2+1] 3802 j = {0,0,2} [size = nz = 6] 3803 v = {1,2,3} [size = nz = 6] 3804 3805 Process1 [P1]: rows_owned=[2] 3806 i = {0,3} [size = nrow+1 = 1+1] 3807 j = {0,1,2} [size = nz = 6] 3808 v = {4,5,6} [size = nz = 6] 3809 3810 .keywords: matrix, aij, compressed row, sparse, parallel 3811 3812 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3813 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3814 @*/ 3815 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3816 { 3817 PetscErrorCode ierr; 3818 3819 PetscFunctionBegin; 3820 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3821 PetscFunctionReturn(0); 3822 } 3823 3824 #undef __FUNCT__ 3825 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3826 /*@C 3827 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3828 (the default parallel PETSc format). For good matrix assembly performance 3829 the user should preallocate the matrix storage by setting the parameters 3830 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3831 performance can be increased by more than a factor of 50. 3832 3833 Collective on MPI_Comm 3834 3835 Input Parameters: 3836 + A - the matrix 3837 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3838 (same value is used for all local rows) 3839 . d_nnz - array containing the number of nonzeros in the various rows of the 3840 DIAGONAL portion of the local submatrix (possibly different for each row) 3841 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3842 The size of this array is equal to the number of local rows, i.e 'm'. 3843 For matrices that will be factored, you must leave room for (and set) 3844 the diagonal entry even if it is zero. 3845 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3846 submatrix (same value is used for all local rows). 3847 - o_nnz - array containing the number of nonzeros in the various rows of the 3848 OFF-DIAGONAL portion of the local submatrix (possibly different for 3849 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3850 structure. The size of this array is equal to the number 3851 of local rows, i.e 'm'. 3852 3853 If the *_nnz parameter is given then the *_nz parameter is ignored 3854 3855 The AIJ format (also called the Yale sparse matrix format or 3856 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3857 storage. The stored row and column indices begin with zero. 3858 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3859 3860 The parallel matrix is partitioned such that the first m0 rows belong to 3861 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3862 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3863 3864 The DIAGONAL portion of the local submatrix of a processor can be defined 3865 as the submatrix which is obtained by extraction the part corresponding to 3866 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3867 first row that belongs to the processor, r2 is the last row belonging to 3868 the this processor, and c1-c2 is range of indices of the local part of a 3869 vector suitable for applying the matrix to. This is an mxn matrix. In the 3870 common case of a square matrix, the row and column ranges are the same and 3871 the DIAGONAL part is also square. The remaining portion of the local 3872 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3873 3874 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3875 3876 You can call MatGetInfo() to get information on how effective the preallocation was; 3877 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3878 You can also run with the option -info and look for messages with the string 3879 malloc in them to see if additional memory allocation was needed. 3880 3881 Example usage: 3882 3883 Consider the following 8x8 matrix with 34 non-zero values, that is 3884 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3885 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3886 as follows: 3887 3888 .vb 3889 1 2 0 | 0 3 0 | 0 4 3890 Proc0 0 5 6 | 7 0 0 | 8 0 3891 9 0 10 | 11 0 0 | 12 0 3892 ------------------------------------- 3893 13 0 14 | 15 16 17 | 0 0 3894 Proc1 0 18 0 | 19 20 21 | 0 0 3895 0 0 0 | 22 23 0 | 24 0 3896 ------------------------------------- 3897 Proc2 25 26 27 | 0 0 28 | 29 0 3898 30 0 0 | 31 32 33 | 0 34 3899 .ve 3900 3901 This can be represented as a collection of submatrices as: 3902 3903 .vb 3904 A B C 3905 D E F 3906 G H I 3907 .ve 3908 3909 Where the submatrices A,B,C are owned by proc0, D,E,F are 3910 owned by proc1, G,H,I are owned by proc2. 3911 3912 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3913 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3914 The 'M','N' parameters are 8,8, and have the same values on all procs. 3915 3916 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3917 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3918 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3919 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3920 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3921 matrix, ans [DF] as another SeqAIJ matrix. 3922 3923 When d_nz, o_nz parameters are specified, d_nz storage elements are 3924 allocated for every row of the local diagonal submatrix, and o_nz 3925 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3926 One way to choose d_nz and o_nz is to use the max nonzerors per local 3927 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3928 In this case, the values of d_nz,o_nz are: 3929 .vb 3930 proc0 : dnz = 2, o_nz = 2 3931 proc1 : dnz = 3, o_nz = 2 3932 proc2 : dnz = 1, o_nz = 4 3933 .ve 3934 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3935 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3936 for proc3. i.e we are using 12+15+10=37 storage locations to store 3937 34 values. 3938 3939 When d_nnz, o_nnz parameters are specified, the storage is specified 3940 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3941 In the above case the values for d_nnz,o_nnz are: 3942 .vb 3943 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3944 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3945 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3946 .ve 3947 Here the space allocated is sum of all the above values i.e 34, and 3948 hence pre-allocation is perfect. 3949 3950 Level: intermediate 3951 3952 .keywords: matrix, aij, compressed row, sparse, parallel 3953 3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3955 MPIAIJ, MatGetInfo() 3956 @*/ 3957 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3958 { 3959 PetscErrorCode ierr; 3960 3961 PetscFunctionBegin; 3962 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3963 PetscValidType(B,1); 3964 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3965 PetscFunctionReturn(0); 3966 } 3967 3968 #undef __FUNCT__ 3969 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3970 /*@ 3971 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3972 CSR format the local rows. 3973 3974 Collective on MPI_Comm 3975 3976 Input Parameters: 3977 + comm - MPI communicator 3978 . m - number of local rows (Cannot be PETSC_DECIDE) 3979 . n - This value should be the same as the local size used in creating the 3980 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3981 calculated if N is given) For square matrices n is almost always m. 3982 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3983 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3984 . i - row indices 3985 . j - column indices 3986 - a - matrix values 3987 3988 Output Parameter: 3989 . mat - the matrix 3990 3991 Level: intermediate 3992 3993 Notes: 3994 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3995 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3996 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3997 3998 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3999 4000 The format which is used for the sparse matrix input, is equivalent to a 4001 row-major ordering.. i.e for the following matrix, the input data expected is 4002 as shown: 4003 4004 1 0 0 4005 2 0 3 P0 4006 ------- 4007 4 5 6 P1 4008 4009 Process0 [P0]: rows_owned=[0,1] 4010 i = {0,1,3} [size = nrow+1 = 2+1] 4011 j = {0,0,2} [size = nz = 6] 4012 v = {1,2,3} [size = nz = 6] 4013 4014 Process1 [P1]: rows_owned=[2] 4015 i = {0,3} [size = nrow+1 = 1+1] 4016 j = {0,1,2} [size = nz = 6] 4017 v = {4,5,6} [size = nz = 6] 4018 4019 .keywords: matrix, aij, compressed row, sparse, parallel 4020 4021 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4022 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4023 @*/ 4024 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4025 { 4026 PetscErrorCode ierr; 4027 4028 PetscFunctionBegin; 4029 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4030 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4031 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4032 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4033 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4034 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4035 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4036 PetscFunctionReturn(0); 4037 } 4038 4039 #undef __FUNCT__ 4040 #define __FUNCT__ "MatCreateAIJ" 4041 /*@C 4042 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4043 (the default parallel PETSc format). For good matrix assembly performance 4044 the user should preallocate the matrix storage by setting the parameters 4045 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4046 performance can be increased by more than a factor of 50. 4047 4048 Collective on MPI_Comm 4049 4050 Input Parameters: 4051 + comm - MPI communicator 4052 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4053 This value should be the same as the local size used in creating the 4054 y vector for the matrix-vector product y = Ax. 4055 . n - This value should be the same as the local size used in creating the 4056 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4057 calculated if N is given) For square matrices n is almost always m. 4058 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4059 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4060 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4061 (same value is used for all local rows) 4062 . d_nnz - array containing the number of nonzeros in the various rows of the 4063 DIAGONAL portion of the local submatrix (possibly different for each row) 4064 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4065 The size of this array is equal to the number of local rows, i.e 'm'. 4066 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4067 submatrix (same value is used for all local rows). 4068 - o_nnz - array containing the number of nonzeros in the various rows of the 4069 OFF-DIAGONAL portion of the local submatrix (possibly different for 4070 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4071 structure. The size of this array is equal to the number 4072 of local rows, i.e 'm'. 4073 4074 Output Parameter: 4075 . A - the matrix 4076 4077 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4078 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4079 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4080 4081 Notes: 4082 If the *_nnz parameter is given then the *_nz parameter is ignored 4083 4084 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4085 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4086 storage requirements for this matrix. 4087 4088 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4089 processor than it must be used on all processors that share the object for 4090 that argument. 4091 4092 The user MUST specify either the local or global matrix dimensions 4093 (possibly both). 4094 4095 The parallel matrix is partitioned across processors such that the 4096 first m0 rows belong to process 0, the next m1 rows belong to 4097 process 1, the next m2 rows belong to process 2 etc.. where 4098 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4099 values corresponding to [m x N] submatrix. 4100 4101 The columns are logically partitioned with the n0 columns belonging 4102 to 0th partition, the next n1 columns belonging to the next 4103 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4104 4105 The DIAGONAL portion of the local submatrix on any given processor 4106 is the submatrix corresponding to the rows and columns m,n 4107 corresponding to the given processor. i.e diagonal matrix on 4108 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4109 etc. The remaining portion of the local submatrix [m x (N-n)] 4110 constitute the OFF-DIAGONAL portion. The example below better 4111 illustrates this concept. 4112 4113 For a square global matrix we define each processor's diagonal portion 4114 to be its local rows and the corresponding columns (a square submatrix); 4115 each processor's off-diagonal portion encompasses the remainder of the 4116 local matrix (a rectangular submatrix). 4117 4118 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4119 4120 When calling this routine with a single process communicator, a matrix of 4121 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4122 type of communicator, use the construction mechanism: 4123 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4124 4125 By default, this format uses inodes (identical nodes) when possible. 4126 We search for consecutive rows with the same nonzero structure, thereby 4127 reusing matrix information to achieve increased efficiency. 4128 4129 Options Database Keys: 4130 + -mat_no_inode - Do not use inodes 4131 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4132 - -mat_aij_oneindex - Internally use indexing starting at 1 4133 rather than 0. Note that when calling MatSetValues(), 4134 the user still MUST index entries starting at 0! 4135 4136 4137 Example usage: 4138 4139 Consider the following 8x8 matrix with 34 non-zero values, that is 4140 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4141 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4142 as follows: 4143 4144 .vb 4145 1 2 0 | 0 3 0 | 0 4 4146 Proc0 0 5 6 | 7 0 0 | 8 0 4147 9 0 10 | 11 0 0 | 12 0 4148 ------------------------------------- 4149 13 0 14 | 15 16 17 | 0 0 4150 Proc1 0 18 0 | 19 20 21 | 0 0 4151 0 0 0 | 22 23 0 | 24 0 4152 ------------------------------------- 4153 Proc2 25 26 27 | 0 0 28 | 29 0 4154 30 0 0 | 31 32 33 | 0 34 4155 .ve 4156 4157 This can be represented as a collection of submatrices as: 4158 4159 .vb 4160 A B C 4161 D E F 4162 G H I 4163 .ve 4164 4165 Where the submatrices A,B,C are owned by proc0, D,E,F are 4166 owned by proc1, G,H,I are owned by proc2. 4167 4168 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4169 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4170 The 'M','N' parameters are 8,8, and have the same values on all procs. 4171 4172 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4173 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4174 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4175 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4176 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4177 matrix, ans [DF] as another SeqAIJ matrix. 4178 4179 When d_nz, o_nz parameters are specified, d_nz storage elements are 4180 allocated for every row of the local diagonal submatrix, and o_nz 4181 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4182 One way to choose d_nz and o_nz is to use the max nonzerors per local 4183 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4184 In this case, the values of d_nz,o_nz are: 4185 .vb 4186 proc0 : dnz = 2, o_nz = 2 4187 proc1 : dnz = 3, o_nz = 2 4188 proc2 : dnz = 1, o_nz = 4 4189 .ve 4190 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4191 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4192 for proc3. i.e we are using 12+15+10=37 storage locations to store 4193 34 values. 4194 4195 When d_nnz, o_nnz parameters are specified, the storage is specified 4196 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4197 In the above case the values for d_nnz,o_nnz are: 4198 .vb 4199 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4200 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4201 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4202 .ve 4203 Here the space allocated is sum of all the above values i.e 34, and 4204 hence pre-allocation is perfect. 4205 4206 Level: intermediate 4207 4208 .keywords: matrix, aij, compressed row, sparse, parallel 4209 4210 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4211 MPIAIJ, MatCreateMPIAIJWithArrays() 4212 @*/ 4213 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4214 { 4215 PetscErrorCode ierr; 4216 PetscMPIInt size; 4217 4218 PetscFunctionBegin; 4219 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4220 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4221 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4222 if (size > 1) { 4223 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4224 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4225 } else { 4226 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4227 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4228 } 4229 PetscFunctionReturn(0); 4230 } 4231 4232 #undef __FUNCT__ 4233 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4234 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4235 { 4236 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4237 4238 PetscFunctionBegin; 4239 *Ad = a->A; 4240 *Ao = a->B; 4241 *colmap = a->garray; 4242 PetscFunctionReturn(0); 4243 } 4244 4245 #undef __FUNCT__ 4246 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4247 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4248 { 4249 PetscErrorCode ierr; 4250 PetscInt i; 4251 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4252 4253 PetscFunctionBegin; 4254 if (coloring->ctype == IS_COLORING_GLOBAL) { 4255 ISColoringValue *allcolors,*colors; 4256 ISColoring ocoloring; 4257 4258 /* set coloring for diagonal portion */ 4259 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4260 4261 /* set coloring for off-diagonal portion */ 4262 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4263 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4264 for (i=0; i<a->B->cmap->n; i++) { 4265 colors[i] = allcolors[a->garray[i]]; 4266 } 4267 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4268 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4269 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4270 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4271 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4272 ISColoringValue *colors; 4273 PetscInt *larray; 4274 ISColoring ocoloring; 4275 4276 /* set coloring for diagonal portion */ 4277 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4278 for (i=0; i<a->A->cmap->n; i++) { 4279 larray[i] = i + A->cmap->rstart; 4280 } 4281 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4282 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4283 for (i=0; i<a->A->cmap->n; i++) { 4284 colors[i] = coloring->colors[larray[i]]; 4285 } 4286 ierr = PetscFree(larray);CHKERRQ(ierr); 4287 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4288 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4289 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4290 4291 /* set coloring for off-diagonal portion */ 4292 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4293 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4294 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4295 for (i=0; i<a->B->cmap->n; i++) { 4296 colors[i] = coloring->colors[larray[i]]; 4297 } 4298 ierr = PetscFree(larray);CHKERRQ(ierr); 4299 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4300 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4301 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4302 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4303 4304 PetscFunctionReturn(0); 4305 } 4306 4307 #if defined(PETSC_HAVE_ADIC) 4308 #undef __FUNCT__ 4309 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4310 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4311 { 4312 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4313 PetscErrorCode ierr; 4314 4315 PetscFunctionBegin; 4316 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4317 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4318 PetscFunctionReturn(0); 4319 } 4320 #endif 4321 4322 #undef __FUNCT__ 4323 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4324 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4325 { 4326 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4327 PetscErrorCode ierr; 4328 4329 PetscFunctionBegin; 4330 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4331 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4332 PetscFunctionReturn(0); 4333 } 4334 4335 #undef __FUNCT__ 4336 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4337 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4338 { 4339 PetscErrorCode ierr; 4340 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4341 PetscInt *indx; 4342 4343 PetscFunctionBegin; 4344 /* This routine will ONLY return MPIAIJ type matrix */ 4345 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4346 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4347 if (n == PETSC_DECIDE){ 4348 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4349 } 4350 /* Check sum(n) = N */ 4351 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4352 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4353 4354 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4355 rstart -= m; 4356 4357 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4358 for (i=0;i<m;i++) { 4359 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4360 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4361 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4362 } 4363 4364 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4365 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4366 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4367 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4368 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4369 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4370 PetscFunctionReturn(0); 4371 } 4372 4373 #undef __FUNCT__ 4374 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4375 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4376 { 4377 PetscErrorCode ierr; 4378 PetscInt m,N,i,rstart,nnz,Ii; 4379 PetscInt *indx; 4380 PetscScalar *values; 4381 4382 PetscFunctionBegin; 4383 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4384 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4385 for (i=0;i<m;i++) { 4386 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4387 Ii = i + rstart; 4388 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4389 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4390 } 4391 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4392 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4393 PetscFunctionReturn(0); 4394 } 4395 4396 #undef __FUNCT__ 4397 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4398 /*@ 4399 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4400 matrices from each processor 4401 4402 Collective on MPI_Comm 4403 4404 Input Parameters: 4405 + comm - the communicators the parallel matrix will live on 4406 . inmat - the input sequential matrices 4407 . n - number of local columns (or PETSC_DECIDE) 4408 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4409 4410 Output Parameter: 4411 . outmat - the parallel matrix generated 4412 4413 Level: advanced 4414 4415 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4416 4417 @*/ 4418 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4419 { 4420 PetscErrorCode ierr; 4421 4422 PetscFunctionBegin; 4423 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4424 if (scall == MAT_INITIAL_MATRIX){ 4425 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4426 } 4427 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4428 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4429 PetscFunctionReturn(0); 4430 } 4431 4432 #undef __FUNCT__ 4433 #define __FUNCT__ "MatFileSplit" 4434 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4435 { 4436 PetscErrorCode ierr; 4437 PetscMPIInt rank; 4438 PetscInt m,N,i,rstart,nnz; 4439 size_t len; 4440 const PetscInt *indx; 4441 PetscViewer out; 4442 char *name; 4443 Mat B; 4444 const PetscScalar *values; 4445 4446 PetscFunctionBegin; 4447 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4448 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4449 /* Should this be the type of the diagonal block of A? */ 4450 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4451 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4452 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4453 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4454 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4455 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4456 for (i=0;i<m;i++) { 4457 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4458 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4459 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4460 } 4461 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4462 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4463 4464 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4465 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4466 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4467 sprintf(name,"%s.%d",outfile,rank); 4468 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4469 ierr = PetscFree(name); 4470 ierr = MatView(B,out);CHKERRQ(ierr); 4471 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4472 ierr = MatDestroy(&B);CHKERRQ(ierr); 4473 PetscFunctionReturn(0); 4474 } 4475 4476 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4477 #undef __FUNCT__ 4478 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4479 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4480 { 4481 PetscErrorCode ierr; 4482 Mat_Merge_SeqsToMPI *merge; 4483 PetscContainer container; 4484 4485 PetscFunctionBegin; 4486 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4487 if (container) { 4488 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4501 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4502 ierr = PetscFree(merge);CHKERRQ(ierr); 4503 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4504 } 4505 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4506 PetscFunctionReturn(0); 4507 } 4508 4509 #include <../src/mat/utils/freespace.h> 4510 #include <petscbt.h> 4511 4512 #undef __FUNCT__ 4513 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4514 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4515 { 4516 PetscErrorCode ierr; 4517 MPI_Comm comm=((PetscObject)mpimat)->comm; 4518 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4519 PetscMPIInt size,rank,taga,*len_s; 4520 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4521 PetscInt proc,m; 4522 PetscInt **buf_ri,**buf_rj; 4523 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4524 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4525 MPI_Request *s_waits,*r_waits; 4526 MPI_Status *status; 4527 MatScalar *aa=a->a; 4528 MatScalar **abuf_r,*ba_i; 4529 Mat_Merge_SeqsToMPI *merge; 4530 PetscContainer container; 4531 4532 PetscFunctionBegin; 4533 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4534 4535 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4536 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4537 4538 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4539 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4540 4541 bi = merge->bi; 4542 bj = merge->bj; 4543 buf_ri = merge->buf_ri; 4544 buf_rj = merge->buf_rj; 4545 4546 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4547 owners = merge->rowmap->range; 4548 len_s = merge->len_s; 4549 4550 /* send and recv matrix values */ 4551 /*-----------------------------*/ 4552 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4553 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4554 4555 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4556 for (proc=0,k=0; proc<size; proc++){ 4557 if (!len_s[proc]) continue; 4558 i = owners[proc]; 4559 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4560 k++; 4561 } 4562 4563 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4564 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4565 ierr = PetscFree(status);CHKERRQ(ierr); 4566 4567 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4568 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4569 4570 /* insert mat values of mpimat */ 4571 /*----------------------------*/ 4572 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4573 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4574 4575 for (k=0; k<merge->nrecv; k++){ 4576 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4577 nrows = *(buf_ri_k[k]); 4578 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4579 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4580 } 4581 4582 /* set values of ba */ 4583 m = merge->rowmap->n; 4584 for (i=0; i<m; i++) { 4585 arow = owners[rank] + i; 4586 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4587 bnzi = bi[i+1] - bi[i]; 4588 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4589 4590 /* add local non-zero vals of this proc's seqmat into ba */ 4591 anzi = ai[arow+1] - ai[arow]; 4592 aj = a->j + ai[arow]; 4593 aa = a->a + ai[arow]; 4594 nextaj = 0; 4595 for (j=0; nextaj<anzi; j++){ 4596 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4597 ba_i[j] += aa[nextaj++]; 4598 } 4599 } 4600 4601 /* add received vals into ba */ 4602 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4603 /* i-th row */ 4604 if (i == *nextrow[k]) { 4605 anzi = *(nextai[k]+1) - *nextai[k]; 4606 aj = buf_rj[k] + *(nextai[k]); 4607 aa = abuf_r[k] + *(nextai[k]); 4608 nextaj = 0; 4609 for (j=0; nextaj<anzi; j++){ 4610 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4611 ba_i[j] += aa[nextaj++]; 4612 } 4613 } 4614 nextrow[k]++; nextai[k]++; 4615 } 4616 } 4617 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4618 } 4619 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4620 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4621 4622 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4623 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4624 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4625 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4626 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4627 PetscFunctionReturn(0); 4628 } 4629 4630 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4631 4632 #undef __FUNCT__ 4633 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4634 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4635 { 4636 PetscErrorCode ierr; 4637 Mat B_mpi; 4638 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4639 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4640 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4641 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4642 PetscInt len,proc,*dnz,*onz,bs,cbs; 4643 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4644 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4645 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4646 MPI_Status *status; 4647 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4648 PetscBT lnkbt; 4649 Mat_Merge_SeqsToMPI *merge; 4650 PetscContainer container; 4651 4652 PetscFunctionBegin; 4653 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4654 4655 /* make sure it is a PETSc comm */ 4656 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4657 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4658 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4659 4660 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4661 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4662 4663 /* determine row ownership */ 4664 /*---------------------------------------------------------*/ 4665 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4666 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4667 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4668 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4669 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4670 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4671 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4672 4673 m = merge->rowmap->n; 4674 M = merge->rowmap->N; 4675 owners = merge->rowmap->range; 4676 4677 /* determine the number of messages to send, their lengths */ 4678 /*---------------------------------------------------------*/ 4679 len_s = merge->len_s; 4680 4681 len = 0; /* length of buf_si[] */ 4682 merge->nsend = 0; 4683 for (proc=0; proc<size; proc++){ 4684 len_si[proc] = 0; 4685 if (proc == rank){ 4686 len_s[proc] = 0; 4687 } else { 4688 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4689 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4690 } 4691 if (len_s[proc]) { 4692 merge->nsend++; 4693 nrows = 0; 4694 for (i=owners[proc]; i<owners[proc+1]; i++){ 4695 if (ai[i+1] > ai[i]) nrows++; 4696 } 4697 len_si[proc] = 2*(nrows+1); 4698 len += len_si[proc]; 4699 } 4700 } 4701 4702 /* determine the number and length of messages to receive for ij-structure */ 4703 /*-------------------------------------------------------------------------*/ 4704 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4705 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4706 4707 /* post the Irecv of j-structure */ 4708 /*-------------------------------*/ 4709 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4710 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4711 4712 /* post the Isend of j-structure */ 4713 /*--------------------------------*/ 4714 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4715 4716 for (proc=0, k=0; proc<size; proc++){ 4717 if (!len_s[proc]) continue; 4718 i = owners[proc]; 4719 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4720 k++; 4721 } 4722 4723 /* receives and sends of j-structure are complete */ 4724 /*------------------------------------------------*/ 4725 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4726 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4727 4728 /* send and recv i-structure */ 4729 /*---------------------------*/ 4730 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4731 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4732 4733 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4734 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4735 for (proc=0,k=0; proc<size; proc++){ 4736 if (!len_s[proc]) continue; 4737 /* form outgoing message for i-structure: 4738 buf_si[0]: nrows to be sent 4739 [1:nrows]: row index (global) 4740 [nrows+1:2*nrows+1]: i-structure index 4741 */ 4742 /*-------------------------------------------*/ 4743 nrows = len_si[proc]/2 - 1; 4744 buf_si_i = buf_si + nrows+1; 4745 buf_si[0] = nrows; 4746 buf_si_i[0] = 0; 4747 nrows = 0; 4748 for (i=owners[proc]; i<owners[proc+1]; i++){ 4749 anzi = ai[i+1] - ai[i]; 4750 if (anzi) { 4751 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4752 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4753 nrows++; 4754 } 4755 } 4756 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4757 k++; 4758 buf_si += len_si[proc]; 4759 } 4760 4761 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4762 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4763 4764 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4765 for (i=0; i<merge->nrecv; i++){ 4766 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4767 } 4768 4769 ierr = PetscFree(len_si);CHKERRQ(ierr); 4770 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4771 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4772 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4773 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4774 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4775 ierr = PetscFree(status);CHKERRQ(ierr); 4776 4777 /* compute a local seq matrix in each processor */ 4778 /*----------------------------------------------*/ 4779 /* allocate bi array and free space for accumulating nonzero column info */ 4780 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4781 bi[0] = 0; 4782 4783 /* create and initialize a linked list */ 4784 nlnk = N+1; 4785 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4786 4787 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4788 len = 0; 4789 len = ai[owners[rank+1]] - ai[owners[rank]]; 4790 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4791 current_space = free_space; 4792 4793 /* determine symbolic info for each local row */ 4794 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4795 4796 for (k=0; k<merge->nrecv; k++){ 4797 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4798 nrows = *buf_ri_k[k]; 4799 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4800 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4801 } 4802 4803 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4804 len = 0; 4805 for (i=0;i<m;i++) { 4806 bnzi = 0; 4807 /* add local non-zero cols of this proc's seqmat into lnk */ 4808 arow = owners[rank] + i; 4809 anzi = ai[arow+1] - ai[arow]; 4810 aj = a->j + ai[arow]; 4811 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4812 bnzi += nlnk; 4813 /* add received col data into lnk */ 4814 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4815 if (i == *nextrow[k]) { /* i-th row */ 4816 anzi = *(nextai[k]+1) - *nextai[k]; 4817 aj = buf_rj[k] + *nextai[k]; 4818 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4819 bnzi += nlnk; 4820 nextrow[k]++; nextai[k]++; 4821 } 4822 } 4823 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4824 4825 /* if free space is not available, make more free space */ 4826 if (current_space->local_remaining<bnzi) { 4827 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4828 nspacedouble++; 4829 } 4830 /* copy data into free space, then initialize lnk */ 4831 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4832 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4833 4834 current_space->array += bnzi; 4835 current_space->local_used += bnzi; 4836 current_space->local_remaining -= bnzi; 4837 4838 bi[i+1] = bi[i] + bnzi; 4839 } 4840 4841 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4842 4843 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4844 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4845 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4846 4847 /* create symbolic parallel matrix B_mpi */ 4848 /*---------------------------------------*/ 4849 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4850 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4851 if (n==PETSC_DECIDE) { 4852 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4853 } else { 4854 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4855 } 4856 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4857 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4858 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4859 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4860 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4861 4862 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4863 B_mpi->assembled = PETSC_FALSE; 4864 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4865 merge->bi = bi; 4866 merge->bj = bj; 4867 merge->buf_ri = buf_ri; 4868 merge->buf_rj = buf_rj; 4869 merge->coi = PETSC_NULL; 4870 merge->coj = PETSC_NULL; 4871 merge->owners_co = PETSC_NULL; 4872 4873 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4874 4875 /* attach the supporting struct to B_mpi for reuse */ 4876 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4877 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4878 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4879 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4880 *mpimat = B_mpi; 4881 4882 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4883 PetscFunctionReturn(0); 4884 } 4885 4886 #undef __FUNCT__ 4887 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4888 /*@C 4889 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4890 matrices from each processor 4891 4892 Collective on MPI_Comm 4893 4894 Input Parameters: 4895 + comm - the communicators the parallel matrix will live on 4896 . seqmat - the input sequential matrices 4897 . m - number of local rows (or PETSC_DECIDE) 4898 . n - number of local columns (or PETSC_DECIDE) 4899 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4900 4901 Output Parameter: 4902 . mpimat - the parallel matrix generated 4903 4904 Level: advanced 4905 4906 Notes: 4907 The dimensions of the sequential matrix in each processor MUST be the same. 4908 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4909 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4910 @*/ 4911 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4912 { 4913 PetscErrorCode ierr; 4914 PetscMPIInt size; 4915 4916 PetscFunctionBegin; 4917 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4918 if (size == 1){ 4919 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4920 if (scall == MAT_INITIAL_MATRIX){ 4921 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4922 } else { 4923 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4924 } 4925 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4926 PetscFunctionReturn(0); 4927 } 4928 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4929 if (scall == MAT_INITIAL_MATRIX){ 4930 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4931 } 4932 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4933 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4934 PetscFunctionReturn(0); 4935 } 4936 4937 #undef __FUNCT__ 4938 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4939 /*@ 4940 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4941 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4942 with MatGetSize() 4943 4944 Not Collective 4945 4946 Input Parameters: 4947 + A - the matrix 4948 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4949 4950 Output Parameter: 4951 . A_loc - the local sequential matrix generated 4952 4953 Level: developer 4954 4955 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4956 4957 @*/ 4958 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4959 { 4960 PetscErrorCode ierr; 4961 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4962 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4963 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4964 MatScalar *aa=a->a,*ba=b->a,*cam; 4965 PetscScalar *ca; 4966 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4967 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4968 PetscBool match; 4969 4970 PetscFunctionBegin; 4971 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4972 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4973 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4974 if (scall == MAT_INITIAL_MATRIX){ 4975 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4976 ci[0] = 0; 4977 for (i=0; i<am; i++){ 4978 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4979 } 4980 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4981 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4982 k = 0; 4983 for (i=0; i<am; i++) { 4984 ncols_o = bi[i+1] - bi[i]; 4985 ncols_d = ai[i+1] - ai[i]; 4986 /* off-diagonal portion of A */ 4987 for (jo=0; jo<ncols_o; jo++) { 4988 col = cmap[*bj]; 4989 if (col >= cstart) break; 4990 cj[k] = col; bj++; 4991 ca[k++] = *ba++; 4992 } 4993 /* diagonal portion of A */ 4994 for (j=0; j<ncols_d; j++) { 4995 cj[k] = cstart + *aj++; 4996 ca[k++] = *aa++; 4997 } 4998 /* off-diagonal portion of A */ 4999 for (j=jo; j<ncols_o; j++) { 5000 cj[k] = cmap[*bj++]; 5001 ca[k++] = *ba++; 5002 } 5003 } 5004 /* put together the new matrix */ 5005 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5006 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5007 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5008 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5009 mat->free_a = PETSC_TRUE; 5010 mat->free_ij = PETSC_TRUE; 5011 mat->nonew = 0; 5012 } else if (scall == MAT_REUSE_MATRIX){ 5013 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5014 ci = mat->i; cj = mat->j; cam = mat->a; 5015 for (i=0; i<am; i++) { 5016 /* off-diagonal portion of A */ 5017 ncols_o = bi[i+1] - bi[i]; 5018 for (jo=0; jo<ncols_o; jo++) { 5019 col = cmap[*bj]; 5020 if (col >= cstart) break; 5021 *cam++ = *ba++; bj++; 5022 } 5023 /* diagonal portion of A */ 5024 ncols_d = ai[i+1] - ai[i]; 5025 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5026 /* off-diagonal portion of A */ 5027 for (j=jo; j<ncols_o; j++) { 5028 *cam++ = *ba++; bj++; 5029 } 5030 } 5031 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5032 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5033 PetscFunctionReturn(0); 5034 } 5035 5036 #undef __FUNCT__ 5037 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5038 /*@C 5039 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5040 5041 Not Collective 5042 5043 Input Parameters: 5044 + A - the matrix 5045 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5046 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5047 5048 Output Parameter: 5049 . A_loc - the local sequential matrix generated 5050 5051 Level: developer 5052 5053 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5054 5055 @*/ 5056 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5057 { 5058 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5059 PetscErrorCode ierr; 5060 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5061 IS isrowa,iscola; 5062 Mat *aloc; 5063 PetscBool match; 5064 5065 PetscFunctionBegin; 5066 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5067 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5068 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5069 if (!row){ 5070 start = A->rmap->rstart; end = A->rmap->rend; 5071 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5072 } else { 5073 isrowa = *row; 5074 } 5075 if (!col){ 5076 start = A->cmap->rstart; 5077 cmap = a->garray; 5078 nzA = a->A->cmap->n; 5079 nzB = a->B->cmap->n; 5080 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5081 ncols = 0; 5082 for (i=0; i<nzB; i++) { 5083 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5084 else break; 5085 } 5086 imark = i; 5087 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5088 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5089 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5090 } else { 5091 iscola = *col; 5092 } 5093 if (scall != MAT_INITIAL_MATRIX){ 5094 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5095 aloc[0] = *A_loc; 5096 } 5097 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5098 *A_loc = aloc[0]; 5099 ierr = PetscFree(aloc);CHKERRQ(ierr); 5100 if (!row){ 5101 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5102 } 5103 if (!col){ 5104 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5105 } 5106 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5107 PetscFunctionReturn(0); 5108 } 5109 5110 #undef __FUNCT__ 5111 #define __FUNCT__ "MatGetBrowsOfAcols" 5112 /*@C 5113 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5114 5115 Collective on Mat 5116 5117 Input Parameters: 5118 + A,B - the matrices in mpiaij format 5119 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5120 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5121 5122 Output Parameter: 5123 + rowb, colb - index sets of rows and columns of B to extract 5124 - B_seq - the sequential matrix generated 5125 5126 Level: developer 5127 5128 @*/ 5129 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5130 { 5131 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5132 PetscErrorCode ierr; 5133 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5134 IS isrowb,iscolb; 5135 Mat *bseq=PETSC_NULL; 5136 5137 PetscFunctionBegin; 5138 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5139 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5140 } 5141 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5142 5143 if (scall == MAT_INITIAL_MATRIX){ 5144 start = A->cmap->rstart; 5145 cmap = a->garray; 5146 nzA = a->A->cmap->n; 5147 nzB = a->B->cmap->n; 5148 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5149 ncols = 0; 5150 for (i=0; i<nzB; i++) { /* row < local row index */ 5151 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5152 else break; 5153 } 5154 imark = i; 5155 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5156 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5157 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5158 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5159 } else { 5160 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5161 isrowb = *rowb; iscolb = *colb; 5162 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5163 bseq[0] = *B_seq; 5164 } 5165 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5166 *B_seq = bseq[0]; 5167 ierr = PetscFree(bseq);CHKERRQ(ierr); 5168 if (!rowb){ 5169 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5170 } else { 5171 *rowb = isrowb; 5172 } 5173 if (!colb){ 5174 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5175 } else { 5176 *colb = iscolb; 5177 } 5178 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5179 PetscFunctionReturn(0); 5180 } 5181 5182 #undef __FUNCT__ 5183 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5184 /* 5185 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5186 of the OFF-DIAGONAL portion of local A 5187 5188 Collective on Mat 5189 5190 Input Parameters: 5191 + A,B - the matrices in mpiaij format 5192 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5193 5194 Output Parameter: 5195 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5196 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5197 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5198 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5199 5200 Level: developer 5201 5202 */ 5203 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5204 { 5205 VecScatter_MPI_General *gen_to,*gen_from; 5206 PetscErrorCode ierr; 5207 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5208 Mat_SeqAIJ *b_oth; 5209 VecScatter ctx=a->Mvctx; 5210 MPI_Comm comm=((PetscObject)ctx)->comm; 5211 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5212 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5213 PetscScalar *rvalues,*svalues; 5214 MatScalar *b_otha,*bufa,*bufA; 5215 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5216 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5217 MPI_Status *sstatus,rstatus; 5218 PetscMPIInt jj; 5219 PetscInt *cols,sbs,rbs; 5220 PetscScalar *vals; 5221 5222 PetscFunctionBegin; 5223 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5224 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5225 } 5226 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5227 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5228 5229 gen_to = (VecScatter_MPI_General*)ctx->todata; 5230 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5231 rvalues = gen_from->values; /* holds the length of receiving row */ 5232 svalues = gen_to->values; /* holds the length of sending row */ 5233 nrecvs = gen_from->n; 5234 nsends = gen_to->n; 5235 5236 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5237 srow = gen_to->indices; /* local row index to be sent */ 5238 sstarts = gen_to->starts; 5239 sprocs = gen_to->procs; 5240 sstatus = gen_to->sstatus; 5241 sbs = gen_to->bs; 5242 rstarts = gen_from->starts; 5243 rprocs = gen_from->procs; 5244 rbs = gen_from->bs; 5245 5246 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5247 if (scall == MAT_INITIAL_MATRIX){ 5248 /* i-array */ 5249 /*---------*/ 5250 /* post receives */ 5251 for (i=0; i<nrecvs; i++){ 5252 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5253 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5254 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5255 } 5256 5257 /* pack the outgoing message */ 5258 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5259 sstartsj[0] = 0; rstartsj[0] = 0; 5260 len = 0; /* total length of j or a array to be sent */ 5261 k = 0; 5262 for (i=0; i<nsends; i++){ 5263 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5264 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5265 for (j=0; j<nrows; j++) { 5266 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5267 for (l=0; l<sbs; l++){ 5268 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5269 rowlen[j*sbs+l] = ncols; 5270 len += ncols; 5271 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5272 } 5273 k++; 5274 } 5275 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5276 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5277 } 5278 /* recvs and sends of i-array are completed */ 5279 i = nrecvs; 5280 while (i--) { 5281 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5282 } 5283 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5284 5285 /* allocate buffers for sending j and a arrays */ 5286 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5287 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5288 5289 /* create i-array of B_oth */ 5290 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5291 b_othi[0] = 0; 5292 len = 0; /* total length of j or a array to be received */ 5293 k = 0; 5294 for (i=0; i<nrecvs; i++){ 5295 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5296 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5297 for (j=0; j<nrows; j++) { 5298 b_othi[k+1] = b_othi[k] + rowlen[j]; 5299 len += rowlen[j]; k++; 5300 } 5301 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5302 } 5303 5304 /* allocate space for j and a arrrays of B_oth */ 5305 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5306 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5307 5308 /* j-array */ 5309 /*---------*/ 5310 /* post receives of j-array */ 5311 for (i=0; i<nrecvs; i++){ 5312 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5313 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5314 } 5315 5316 /* pack the outgoing message j-array */ 5317 k = 0; 5318 for (i=0; i<nsends; i++){ 5319 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5320 bufJ = bufj+sstartsj[i]; 5321 for (j=0; j<nrows; j++) { 5322 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5323 for (ll=0; ll<sbs; ll++){ 5324 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5325 for (l=0; l<ncols; l++){ 5326 *bufJ++ = cols[l]; 5327 } 5328 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5329 } 5330 } 5331 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5332 } 5333 5334 /* recvs and sends of j-array are completed */ 5335 i = nrecvs; 5336 while (i--) { 5337 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5338 } 5339 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5340 } else if (scall == MAT_REUSE_MATRIX){ 5341 sstartsj = *startsj_s; 5342 rstartsj = *startsj_r; 5343 bufa = *bufa_ptr; 5344 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5345 b_otha = b_oth->a; 5346 } else { 5347 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5348 } 5349 5350 /* a-array */ 5351 /*---------*/ 5352 /* post receives of a-array */ 5353 for (i=0; i<nrecvs; i++){ 5354 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5355 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5356 } 5357 5358 /* pack the outgoing message a-array */ 5359 k = 0; 5360 for (i=0; i<nsends; i++){ 5361 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5362 bufA = bufa+sstartsj[i]; 5363 for (j=0; j<nrows; j++) { 5364 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5365 for (ll=0; ll<sbs; ll++){ 5366 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5367 for (l=0; l<ncols; l++){ 5368 *bufA++ = vals[l]; 5369 } 5370 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5371 } 5372 } 5373 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5374 } 5375 /* recvs and sends of a-array are completed */ 5376 i = nrecvs; 5377 while (i--) { 5378 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5379 } 5380 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5381 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5382 5383 if (scall == MAT_INITIAL_MATRIX){ 5384 /* put together the new matrix */ 5385 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5386 5387 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5388 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5389 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5390 b_oth->free_a = PETSC_TRUE; 5391 b_oth->free_ij = PETSC_TRUE; 5392 b_oth->nonew = 0; 5393 5394 ierr = PetscFree(bufj);CHKERRQ(ierr); 5395 if (!startsj_s || !bufa_ptr){ 5396 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5397 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5398 } else { 5399 *startsj_s = sstartsj; 5400 *startsj_r = rstartsj; 5401 *bufa_ptr = bufa; 5402 } 5403 } 5404 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5405 PetscFunctionReturn(0); 5406 } 5407 5408 #undef __FUNCT__ 5409 #define __FUNCT__ "MatGetCommunicationStructs" 5410 /*@C 5411 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5412 5413 Not Collective 5414 5415 Input Parameters: 5416 . A - The matrix in mpiaij format 5417 5418 Output Parameter: 5419 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5420 . colmap - A map from global column index to local index into lvec 5421 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5422 5423 Level: developer 5424 5425 @*/ 5426 #if defined (PETSC_USE_CTABLE) 5427 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5428 #else 5429 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5430 #endif 5431 { 5432 Mat_MPIAIJ *a; 5433 5434 PetscFunctionBegin; 5435 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5436 PetscValidPointer(lvec, 2); 5437 PetscValidPointer(colmap, 3); 5438 PetscValidPointer(multScatter, 4); 5439 a = (Mat_MPIAIJ *) A->data; 5440 if (lvec) *lvec = a->lvec; 5441 if (colmap) *colmap = a->colmap; 5442 if (multScatter) *multScatter = a->Mvctx; 5443 PetscFunctionReturn(0); 5444 } 5445 5446 EXTERN_C_BEGIN 5447 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5448 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5449 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5450 EXTERN_C_END 5451 5452 #undef __FUNCT__ 5453 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5454 /* 5455 Computes (B'*A')' since computing B*A directly is untenable 5456 5457 n p p 5458 ( ) ( ) ( ) 5459 m ( A ) * n ( B ) = m ( C ) 5460 ( ) ( ) ( ) 5461 5462 */ 5463 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5464 { 5465 PetscErrorCode ierr; 5466 Mat At,Bt,Ct; 5467 5468 PetscFunctionBegin; 5469 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5470 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5471 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5472 ierr = MatDestroy(&At);CHKERRQ(ierr); 5473 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5474 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5475 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5476 PetscFunctionReturn(0); 5477 } 5478 5479 #undef __FUNCT__ 5480 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5481 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5482 { 5483 PetscErrorCode ierr; 5484 PetscInt m=A->rmap->n,n=B->cmap->n; 5485 Mat Cmat; 5486 5487 PetscFunctionBegin; 5488 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5489 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5490 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5491 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5492 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5493 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5494 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5495 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5496 *C = Cmat; 5497 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5498 PetscFunctionReturn(0); 5499 } 5500 5501 /* ----------------------------------------------------------------*/ 5502 #undef __FUNCT__ 5503 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5504 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5505 { 5506 PetscErrorCode ierr; 5507 5508 PetscFunctionBegin; 5509 if (scall == MAT_INITIAL_MATRIX){ 5510 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5511 } 5512 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5513 PetscFunctionReturn(0); 5514 } 5515 5516 EXTERN_C_BEGIN 5517 #if defined(PETSC_HAVE_MUMPS) 5518 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5519 #endif 5520 #if defined(PETSC_HAVE_PASTIX) 5521 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5522 #endif 5523 #if defined(PETSC_HAVE_SUPERLU_DIST) 5524 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5525 #endif 5526 #if defined(PETSC_HAVE_SPOOLES) 5527 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5528 #endif 5529 EXTERN_C_END 5530 5531 /*MC 5532 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5533 5534 Options Database Keys: 5535 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5536 5537 Level: beginner 5538 5539 .seealso: MatCreateAIJ() 5540 M*/ 5541 5542 EXTERN_C_BEGIN 5543 #undef __FUNCT__ 5544 #define __FUNCT__ "MatCreate_MPIAIJ" 5545 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5546 { 5547 Mat_MPIAIJ *b; 5548 PetscErrorCode ierr; 5549 PetscMPIInt size; 5550 5551 PetscFunctionBegin; 5552 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5553 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5554 B->data = (void*)b; 5555 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5556 B->assembled = PETSC_FALSE; 5557 B->insertmode = NOT_SET_VALUES; 5558 b->size = size; 5559 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5560 5561 /* build cache for off array entries formed */ 5562 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5563 b->donotstash = PETSC_FALSE; 5564 b->colmap = 0; 5565 b->garray = 0; 5566 b->roworiented = PETSC_TRUE; 5567 5568 /* stuff used for matrix vector multiply */ 5569 b->lvec = PETSC_NULL; 5570 b->Mvctx = PETSC_NULL; 5571 5572 /* stuff for MatGetRow() */ 5573 b->rowindices = 0; 5574 b->rowvalues = 0; 5575 b->getrowactive = PETSC_FALSE; 5576 5577 /* flexible pointer used in CUSP/CUSPARSE classes */ 5578 b->spptr = PETSC_NULL; 5579 #if defined(PETSC_HAVE_SPOOLES) 5580 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5581 "MatGetFactor_mpiaij_spooles", 5582 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5583 #endif 5584 #if defined(PETSC_HAVE_MUMPS) 5585 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5586 "MatGetFactor_aij_mumps", 5587 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5588 #endif 5589 #if defined(PETSC_HAVE_PASTIX) 5590 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5591 "MatGetFactor_mpiaij_pastix", 5592 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5593 #endif 5594 #if defined(PETSC_HAVE_SUPERLU_DIST) 5595 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5596 "MatGetFactor_mpiaij_superlu_dist", 5597 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5598 #endif 5599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5600 "MatStoreValues_MPIAIJ", 5601 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5603 "MatRetrieveValues_MPIAIJ", 5604 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5606 "MatGetDiagonalBlock_MPIAIJ", 5607 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5609 "MatIsTranspose_MPIAIJ", 5610 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5612 "MatMPIAIJSetPreallocation_MPIAIJ", 5613 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5615 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5616 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5618 "MatDiagonalScaleLocal_MPIAIJ", 5619 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5620 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5621 "MatConvert_MPIAIJ_MPIAIJPERM", 5622 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5623 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5624 "MatConvert_MPIAIJ_MPIAIJCRL", 5625 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5627 "MatConvert_MPIAIJ_MPISBAIJ", 5628 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5630 "MatMatMult_MPIDense_MPIAIJ", 5631 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5633 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5634 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5635 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5636 "MatMatMultNumeric_MPIDense_MPIAIJ", 5637 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5638 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5639 PetscFunctionReturn(0); 5640 } 5641 EXTERN_C_END 5642 5643 #undef __FUNCT__ 5644 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5645 /*@ 5646 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5647 and "off-diagonal" part of the matrix in CSR format. 5648 5649 Collective on MPI_Comm 5650 5651 Input Parameters: 5652 + comm - MPI communicator 5653 . m - number of local rows (Cannot be PETSC_DECIDE) 5654 . n - This value should be the same as the local size used in creating the 5655 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5656 calculated if N is given) For square matrices n is almost always m. 5657 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5658 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5659 . i - row indices for "diagonal" portion of matrix 5660 . j - column indices 5661 . a - matrix values 5662 . oi - row indices for "off-diagonal" portion of matrix 5663 . oj - column indices 5664 - oa - matrix values 5665 5666 Output Parameter: 5667 . mat - the matrix 5668 5669 Level: advanced 5670 5671 Notes: 5672 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5673 must free the arrays once the matrix has been destroyed and not before. 5674 5675 The i and j indices are 0 based 5676 5677 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5678 5679 This sets local rows and cannot be used to set off-processor values. 5680 5681 You cannot later use MatSetValues() to change values in this matrix. 5682 5683 .keywords: matrix, aij, compressed row, sparse, parallel 5684 5685 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5686 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5687 @*/ 5688 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5689 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5690 { 5691 PetscErrorCode ierr; 5692 Mat_MPIAIJ *maij; 5693 5694 PetscFunctionBegin; 5695 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5696 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5697 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5698 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5699 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5700 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5701 maij = (Mat_MPIAIJ*) (*mat)->data; 5702 maij->donotstash = PETSC_TRUE; 5703 (*mat)->preallocated = PETSC_TRUE; 5704 5705 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5706 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5707 5708 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5709 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5710 5711 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5712 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5713 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5714 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5715 5716 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5717 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5718 PetscFunctionReturn(0); 5719 } 5720 5721 /* 5722 Special version for direct calls from Fortran 5723 */ 5724 #include <petsc-private/fortranimpl.h> 5725 5726 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5727 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5728 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5729 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5730 #endif 5731 5732 /* Change these macros so can be used in void function */ 5733 #undef CHKERRQ 5734 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5735 #undef SETERRQ2 5736 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5737 #undef SETERRQ3 5738 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5739 #undef SETERRQ 5740 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5741 5742 EXTERN_C_BEGIN 5743 #undef __FUNCT__ 5744 #define __FUNCT__ "matsetvaluesmpiaij_" 5745 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5746 { 5747 Mat mat = *mmat; 5748 PetscInt m = *mm, n = *mn; 5749 InsertMode addv = *maddv; 5750 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5751 PetscScalar value; 5752 PetscErrorCode ierr; 5753 5754 MatCheckPreallocated(mat,1); 5755 if (mat->insertmode == NOT_SET_VALUES) { 5756 mat->insertmode = addv; 5757 } 5758 #if defined(PETSC_USE_DEBUG) 5759 else if (mat->insertmode != addv) { 5760 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5761 } 5762 #endif 5763 { 5764 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5765 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5766 PetscBool roworiented = aij->roworiented; 5767 5768 /* Some Variables required in the macro */ 5769 Mat A = aij->A; 5770 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5771 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5772 MatScalar *aa = a->a; 5773 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5774 Mat B = aij->B; 5775 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5776 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5777 MatScalar *ba = b->a; 5778 5779 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5780 PetscInt nonew = a->nonew; 5781 MatScalar *ap1,*ap2; 5782 5783 PetscFunctionBegin; 5784 for (i=0; i<m; i++) { 5785 if (im[i] < 0) continue; 5786 #if defined(PETSC_USE_DEBUG) 5787 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5788 #endif 5789 if (im[i] >= rstart && im[i] < rend) { 5790 row = im[i] - rstart; 5791 lastcol1 = -1; 5792 rp1 = aj + ai[row]; 5793 ap1 = aa + ai[row]; 5794 rmax1 = aimax[row]; 5795 nrow1 = ailen[row]; 5796 low1 = 0; 5797 high1 = nrow1; 5798 lastcol2 = -1; 5799 rp2 = bj + bi[row]; 5800 ap2 = ba + bi[row]; 5801 rmax2 = bimax[row]; 5802 nrow2 = bilen[row]; 5803 low2 = 0; 5804 high2 = nrow2; 5805 5806 for (j=0; j<n; j++) { 5807 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5808 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5809 if (in[j] >= cstart && in[j] < cend){ 5810 col = in[j] - cstart; 5811 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5812 } else if (in[j] < 0) continue; 5813 #if defined(PETSC_USE_DEBUG) 5814 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5815 #endif 5816 else { 5817 if (mat->was_assembled) { 5818 if (!aij->colmap) { 5819 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5820 } 5821 #if defined (PETSC_USE_CTABLE) 5822 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5823 col--; 5824 #else 5825 col = aij->colmap[in[j]] - 1; 5826 #endif 5827 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5828 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5829 col = in[j]; 5830 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5831 B = aij->B; 5832 b = (Mat_SeqAIJ*)B->data; 5833 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5834 rp2 = bj + bi[row]; 5835 ap2 = ba + bi[row]; 5836 rmax2 = bimax[row]; 5837 nrow2 = bilen[row]; 5838 low2 = 0; 5839 high2 = nrow2; 5840 bm = aij->B->rmap->n; 5841 ba = b->a; 5842 } 5843 } else col = in[j]; 5844 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5845 } 5846 } 5847 } else { 5848 if (!aij->donotstash) { 5849 if (roworiented) { 5850 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5851 } else { 5852 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5853 } 5854 } 5855 } 5856 }} 5857 PetscFunctionReturnVoid(); 5858 } 5859 EXTERN_C_END 5860 5861