1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 194 { 195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 196 IS sis,gis; 197 PetscErrorCode ierr; 198 const PetscInt *isis,*igis; 199 PetscInt n,*iis,nsis,ngis,rstart,i; 200 201 PetscFunctionBegin; 202 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 203 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 204 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 205 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 206 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 207 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 208 209 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 210 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 212 n = ngis + nsis; 213 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 214 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 215 for (i=0; i<n; i++) iis[i] += rstart; 216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 217 218 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 220 ierr = ISDestroy(&sis);CHKERRQ(ierr); 221 ierr = ISDestroy(&gis);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatDistribute_MPIAIJ" 227 /* 228 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 229 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 230 231 Only for square matrices 232 233 Used by a preconditioner, hence PETSC_EXTERN 234 */ 235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 236 { 237 PetscMPIInt rank,size; 238 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 239 PetscErrorCode ierr; 240 Mat mat; 241 Mat_SeqAIJ *gmata; 242 PetscMPIInt tag; 243 MPI_Status status; 244 PetscBool aij; 245 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 250 if (!rank) { 251 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 252 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 253 } 254 if (reuse == MAT_INITIAL_MATRIX) { 255 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 256 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 257 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 258 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 259 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 260 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 261 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 262 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 263 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 264 265 rowners[0] = 0; 266 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 267 rstart = rowners[rank]; 268 rend = rowners[rank+1]; 269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 270 if (!rank) { 271 gmata = (Mat_SeqAIJ*) gmat->data; 272 /* send row lengths to all processors */ 273 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 274 for (i=1; i<size; i++) { 275 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276 } 277 /* determine number diagonal and off-diagonal counts */ 278 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 279 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 280 jj = 0; 281 for (i=0; i<m; i++) { 282 for (j=0; j<dlens[i]; j++) { 283 if (gmata->j[jj] < rstart) ld[i]++; 284 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 285 jj++; 286 } 287 } 288 /* send column indices to other processes */ 289 for (i=1; i<size; i++) { 290 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 291 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 } 294 295 /* send numerical values to other processes */ 296 for (i=1; i<size; i++) { 297 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 298 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 299 } 300 gmataa = gmata->a; 301 gmataj = gmata->j; 302 303 } else { 304 /* receive row lengths */ 305 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 306 /* receive column indices */ 307 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 309 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* determine number diagonal and off-diagonal counts */ 311 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 313 jj = 0; 314 for (i=0; i<m; i++) { 315 for (j=0; j<dlens[i]; j++) { 316 if (gmataj[jj] < rstart) ld[i]++; 317 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 318 jj++; 319 } 320 } 321 /* receive numerical values */ 322 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* set preallocation */ 326 for (i=0; i<m; i++) { 327 dlens[i] -= olens[i]; 328 } 329 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 330 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 dlens[i] += olens[i]; 334 } 335 cnt = 0; 336 for (i=0; i<m; i++) { 337 row = rstart + i; 338 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 339 cnt += dlens[i]; 340 } 341 if (rank) { 342 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 343 } 344 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 345 ierr = PetscFree(rowners);CHKERRQ(ierr); 346 347 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 348 349 *inmat = mat; 350 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 351 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 352 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 353 mat = *inmat; 354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 355 if (!rank) { 356 /* send numerical values to other processes */ 357 gmata = (Mat_SeqAIJ*) gmat->data; 358 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 359 gmataa = gmata->a; 360 for (i=1; i<size; i++) { 361 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 362 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 363 } 364 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 365 } else { 366 /* receive numerical values from process 0*/ 367 nz = Ad->nz + Ao->nz; 368 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 369 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 370 } 371 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 372 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 373 ad = Ad->a; 374 ao = Ao->a; 375 if (mat->rmap->n) { 376 i = 0; 377 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 378 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 379 } 380 for (i=1; i<mat->rmap->n; i++) { 381 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 i--; 385 if (mat->rmap->n) { 386 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (rank) { 389 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 390 } 391 } 392 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 Local utility routine that creates a mapping from the global column 399 number to the local number in the off-diagonal part of the local 400 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 401 a slightly higher hash table cost; without it it is not scalable (each processor 402 has an order N integer array but is fast to acess. 403 */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 464 { \ 465 if (col <= lastcol2) low2 = 0; \ 466 else high2 = nrow2; \ 467 lastcol2 = col; \ 468 while (high2-low2 > 5) { \ 469 t = (low2+high2)/2; \ 470 if (rp2[t] > col) high2 = t; \ 471 else low2 = t; \ 472 } \ 473 for (_i=low2; _i<high2; _i++) { \ 474 if (rp2[_i] > col) break; \ 475 if (rp2[_i] == col) { \ 476 if (addv == ADD_VALUES) ap2[_i] += value; \ 477 else ap2[_i] = value; \ 478 goto b_noinsert; \ 479 } \ 480 } \ 481 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 484 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 485 N = nrow2++ - 1; b->nz++; high2++; \ 486 /* shift up all the later entries in this row */ \ 487 for (ii=N; ii>=_i; ii--) { \ 488 rp2[ii+1] = rp2[ii]; \ 489 ap2[ii+1] = ap2[ii]; \ 490 } \ 491 rp2[_i] = col; \ 492 ap2[_i] = value; \ 493 B->nonzerostate++; \ 494 b_noinsert: ; \ 495 bilen[row] = nrow2; \ 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSetValues_MPIAIJ" 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (roworiented) value = v[i*n+j]; 577 else value = v[i+j*m]; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } else col = in[j]; 615 nonew = b->nonew; 616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!aij->donotstash) { 622 mat->assembled = PETSC_FALSE; 623 if (roworiented) { 624 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } else { 626 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } 628 } 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatGetValues_MPIAIJ" 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 642 643 PetscFunctionBegin; 644 for (i=0; i<m; i++) { 645 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 646 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 647 if (idxm[i] >= rstart && idxm[i] < rend) { 648 row = idxm[i] - rstart; 649 for (j=0; j<n; j++) { 650 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 651 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 652 if (idxn[j] >= cstart && idxn[j] < cend) { 653 col = idxn[j] - cstart; 654 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } else { 656 if (!aij->colmap) { 657 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 658 } 659 #if defined(PETSC_USE_CTABLE) 660 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 661 col--; 662 #else 663 col = aij->colmap[idxn[j]] - 1; 664 #endif 665 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 666 else { 667 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 668 } 669 } 670 } 671 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 PetscErrorCode ierr; 684 PetscInt nstash,reallocs; 685 686 PetscFunctionBegin; 687 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 688 689 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 690 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 691 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 698 { 699 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 701 PetscErrorCode ierr; 702 PetscMPIInt n; 703 PetscInt i,j,rstart,ncols,flg; 704 PetscInt *row,*col; 705 PetscBool other_disassembled; 706 PetscScalar *val; 707 708 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 709 710 PetscFunctionBegin; 711 if (!aij->donotstash && !mat->nooffprocentries) { 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n; ) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { 719 if (row[j] != rstart) break; 720 } 721 if (j < n) ncols = j-i; 722 else ncols = n-i; 723 /* Now assemble all these values with a single function call */ 724 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 725 726 i = j; 727 } 728 } 729 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 730 } 731 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 741 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 750 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 752 753 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 754 755 aij->rowvalues = 0; 756 757 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 758 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 762 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 777 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatZeroRows_MPIAIJ" 783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 784 { 785 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 786 PetscInt *lrows; 787 PetscInt r, len; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 /* get locally owned rows */ 792 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 793 /* fix right hand side if needed */ 794 if (x && b) { 795 const PetscScalar *xx; 796 PetscScalar *bb; 797 798 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 799 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 800 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 801 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 802 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 803 } 804 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 805 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 806 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 807 PetscBool cong; 808 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 809 if (cong) A->congruentlayouts = 1; 810 else A->congruentlayouts = 0; 811 } 812 if ((diag != 0.0) && A->congruentlayouts) { 813 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 814 } else if (diag != 0.0) { 815 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(lrows);CHKERRQ(ierr); 827 828 /* only change matrix nonzero state if pattern was allowed to be changed */ 829 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 830 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 831 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,p = 0,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscSFNode *rrows; 846 PetscSF sf; 847 const PetscScalar *xx; 848 PetscScalar *bb,*mask; 849 Vec xmask,lmask; 850 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 851 const PetscInt *aj, *ii,*ridx; 852 PetscScalar *aa; 853 854 PetscFunctionBegin; 855 /* Create SF where leaves are input rows and roots are owned rows */ 856 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 857 for (r = 0; r < n; ++r) lrows[r] = -1; 858 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 859 for (r = 0; r < N; ++r) { 860 const PetscInt idx = rows[r]; 861 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 862 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 863 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 864 } 865 rrows[r].rank = p; 866 rrows[r].index = rows[r] - owners[p]; 867 } 868 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 869 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 870 /* Collect flags for rows to be zeroed */ 871 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 872 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 874 /* Compress and put in row numbers */ 875 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 876 /* zero diagonal part of matrix */ 877 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 878 /* handle off diagonal part of matrix */ 879 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 880 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 881 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 882 for (i=0; i<len; i++) bb[lrows[i]] = 1; 883 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 884 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 887 if (x) { 888 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 891 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 892 } 893 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 894 /* remove zeroed rows of off diagonal matrix */ 895 ii = aij->i; 896 for (i=0; i<len; i++) { 897 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 898 } 899 /* loop over all elements of off process part of matrix zeroing removed columns*/ 900 if (aij->compressedrow.use) { 901 m = aij->compressedrow.nrows; 902 ii = aij->compressedrow.i; 903 ridx = aij->compressedrow.rindex; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 909 for (j=0; j<n; j++) { 910 if (PetscAbsScalar(mask[*aj])) { 911 if (b) bb[*ridx] -= *aa*xx[*aj]; 912 *aa = 0.0; 913 } 914 aa++; 915 aj++; 916 } 917 ridx++; 918 } 919 } else { /* do not use compressed row format */ 920 m = l->B->rmap->n; 921 for (i=0; i<m; i++) { 922 n = ii[i+1] - ii[i]; 923 aj = aij->j + ii[i]; 924 aa = aij->a + ii[i]; 925 for (j=0; j<n; j++) { 926 if (PetscAbsScalar(mask[*aj])) { 927 if (b) bb[i] -= *aa*xx[*aj]; 928 *aa = 0.0; 929 } 930 aa++; 931 aj++; 932 } 933 } 934 } 935 if (x) { 936 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 937 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 938 } 939 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 940 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 941 ierr = PetscFree(lrows);CHKERRQ(ierr); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMult_MPIAIJ" 953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 954 { 955 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 956 PetscErrorCode ierr; 957 PetscInt nt; 958 959 PetscFunctionBegin; 960 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 961 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 962 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 964 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatMultAdd_MPIAIJ" 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 991 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 PetscBool merged; 1003 1004 PetscFunctionBegin; 1005 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1006 /* do nondiagonal part */ 1007 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1008 if (!merged) { 1009 /* send it on its way */ 1010 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* receive remote parts: note this assumes the values are not actually */ 1014 /* added in yy until the next line, */ 1015 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 } else { 1017 /* do local part */ 1018 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1019 /* send it on its way */ 1020 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 /* values actually were received in the Begin() but we need to call this nop */ 1022 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1029 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1030 { 1031 MPI_Comm comm; 1032 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1033 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1034 IS Me,Notme; 1035 PetscErrorCode ierr; 1036 PetscInt M,N,first,last,*notme,i; 1037 PetscMPIInt size; 1038 1039 PetscFunctionBegin; 1040 /* Easy test: symmetric diagonal block */ 1041 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1042 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 1076 PetscFunctionBegin; 1077 /* do nondiagonal part */ 1078 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1079 /* send it on its way */ 1080 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 /* do local part */ 1082 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1083 /* receive remote parts */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /* 1089 This only works correctly for square matrices where the subblock A->A is the 1090 diagonal block 1091 */ 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1095 { 1096 PetscErrorCode ierr; 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 1099 PetscFunctionBegin; 1100 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1101 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1102 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatScale_MPIAIJ" 1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1115 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatDestroy_MPIAIJ" 1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1122 { 1123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 #if defined(PETSC_USE_LOG) 1128 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1129 #endif 1130 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1131 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1132 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1133 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1134 #if defined(PETSC_USE_CTABLE) 1135 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1136 #else 1137 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1138 #endif 1139 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1140 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1141 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1142 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1143 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1144 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1145 1146 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1154 #if defined(PETSC_HAVE_ELEMENTAL) 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1156 #endif 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1162 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1163 { 1164 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1165 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1166 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1167 PetscErrorCode ierr; 1168 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1169 int fd; 1170 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1171 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1172 PetscScalar *column_values; 1173 PetscInt message_count,flowcontrolcount; 1174 FILE *file; 1175 1176 PetscFunctionBegin; 1177 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1178 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1179 nz = A->nz + B->nz; 1180 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1181 if (!rank) { 1182 header[0] = MAT_FILE_CLASSID; 1183 header[1] = mat->rmap->N; 1184 header[2] = mat->cmap->N; 1185 1186 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1187 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1188 /* get largest number of rows any processor has */ 1189 rlen = mat->rmap->n; 1190 range = mat->rmap->range; 1191 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1192 } else { 1193 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1194 rlen = mat->rmap->n; 1195 } 1196 1197 /* load up the local row counts */ 1198 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1199 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1200 1201 /* store the row lengths to the file */ 1202 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1203 if (!rank) { 1204 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1205 for (i=1; i<size; i++) { 1206 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1207 rlen = range[i+1] - range[i]; 1208 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1209 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1210 } 1211 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1212 } else { 1213 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1214 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1215 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1216 } 1217 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1218 1219 /* load up the local column indices */ 1220 nzmax = nz; /* th processor needs space a largest processor needs */ 1221 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1222 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1223 cnt = 0; 1224 for (i=0; i<mat->rmap->n; i++) { 1225 for (j=B->i[i]; j<B->i[i+1]; j++) { 1226 if ((col = garray[B->j[j]]) > cstart) break; 1227 column_indices[cnt++] = col; 1228 } 1229 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1230 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1231 } 1232 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1233 1234 /* store the column indices to the file */ 1235 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1236 if (!rank) { 1237 MPI_Status status; 1238 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1239 for (i=1; i<size; i++) { 1240 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1241 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1242 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1243 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1244 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1245 } 1246 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1247 } else { 1248 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1249 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1250 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1251 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1252 } 1253 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1254 1255 /* load up the local column values */ 1256 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1257 cnt = 0; 1258 for (i=0; i<mat->rmap->n; i++) { 1259 for (j=B->i[i]; j<B->i[i+1]; j++) { 1260 if (garray[B->j[j]] > cstart) break; 1261 column_values[cnt++] = B->a[j]; 1262 } 1263 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1264 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1265 } 1266 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1267 1268 /* store the column values to the file */ 1269 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1270 if (!rank) { 1271 MPI_Status status; 1272 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1273 for (i=1; i<size; i++) { 1274 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1275 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1276 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1277 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1278 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1279 } 1280 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1281 } else { 1282 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1283 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1284 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1285 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1286 } 1287 ierr = PetscFree(column_values);CHKERRQ(ierr); 1288 1289 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1290 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #include <petscdraw.h> 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1297 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1298 { 1299 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1300 PetscErrorCode ierr; 1301 PetscMPIInt rank = aij->rank,size = aij->size; 1302 PetscBool isdraw,iascii,isbinary; 1303 PetscViewer sviewer; 1304 PetscViewerFormat format; 1305 1306 PetscFunctionBegin; 1307 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1308 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1309 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1310 if (iascii) { 1311 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1312 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1313 MatInfo info; 1314 PetscBool inodes; 1315 1316 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1317 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1318 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1319 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1320 if (!inodes) { 1321 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1322 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1323 } else { 1324 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1325 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1326 } 1327 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1328 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1329 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1330 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1331 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1332 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1333 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1334 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1337 PetscInt inodecount,inodelimit,*inodes; 1338 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1339 if (inodes) { 1340 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1341 } else { 1342 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1343 } 1344 PetscFunctionReturn(0); 1345 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1346 PetscFunctionReturn(0); 1347 } 1348 } else if (isbinary) { 1349 if (size == 1) { 1350 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1351 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1352 } else { 1353 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1354 } 1355 PetscFunctionReturn(0); 1356 } else if (isdraw) { 1357 PetscDraw draw; 1358 PetscBool isnull; 1359 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1360 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1361 if (isnull) PetscFunctionReturn(0); 1362 } 1363 1364 { 1365 /* assemble the entire matrix onto first processor. */ 1366 Mat A; 1367 Mat_SeqAIJ *Aloc; 1368 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1369 MatScalar *a; 1370 1371 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1372 if (!rank) { 1373 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1374 } else { 1375 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1376 } 1377 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1378 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1379 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1380 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1381 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1382 1383 /* copy over the A part */ 1384 Aloc = (Mat_SeqAIJ*)aij->A->data; 1385 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1386 row = mat->rmap->rstart; 1387 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1388 for (i=0; i<m; i++) { 1389 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1390 row++; 1391 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1392 } 1393 aj = Aloc->j; 1394 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1395 1396 /* copy over the B part */ 1397 Aloc = (Mat_SeqAIJ*)aij->B->data; 1398 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1399 row = mat->rmap->rstart; 1400 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1401 ct = cols; 1402 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1403 for (i=0; i<m; i++) { 1404 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1405 row++; 1406 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1407 } 1408 ierr = PetscFree(ct);CHKERRQ(ierr); 1409 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1410 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 /* 1412 Everyone has to call to draw the matrix since the graphics waits are 1413 synchronized across all processors that share the PetscDraw object 1414 */ 1415 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1416 if (!rank) { 1417 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1418 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1419 } 1420 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1421 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1422 ierr = MatDestroy(&A);CHKERRQ(ierr); 1423 } 1424 PetscFunctionReturn(0); 1425 } 1426 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "MatView_MPIAIJ" 1429 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1430 { 1431 PetscErrorCode ierr; 1432 PetscBool iascii,isdraw,issocket,isbinary; 1433 1434 PetscFunctionBegin; 1435 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1436 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1437 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1438 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1439 if (iascii || isdraw || isbinary || issocket) { 1440 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1441 } 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatSOR_MPIAIJ" 1447 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1448 { 1449 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1450 PetscErrorCode ierr; 1451 Vec bb1 = 0; 1452 PetscBool hasop; 1453 1454 PetscFunctionBegin; 1455 if (flag == SOR_APPLY_UPPER) { 1456 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1457 PetscFunctionReturn(0); 1458 } 1459 1460 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1461 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1462 } 1463 1464 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1465 if (flag & SOR_ZERO_INITIAL_GUESS) { 1466 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1467 its--; 1468 } 1469 1470 while (its--) { 1471 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1472 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1473 1474 /* update rhs: bb1 = bb - B*x */ 1475 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1476 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1477 1478 /* local sweep */ 1479 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1480 } 1481 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1482 if (flag & SOR_ZERO_INITIAL_GUESS) { 1483 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1484 its--; 1485 } 1486 while (its--) { 1487 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1488 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1489 1490 /* update rhs: bb1 = bb - B*x */ 1491 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1492 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1493 1494 /* local sweep */ 1495 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1496 } 1497 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1498 if (flag & SOR_ZERO_INITIAL_GUESS) { 1499 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1500 its--; 1501 } 1502 while (its--) { 1503 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1504 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 1506 /* update rhs: bb1 = bb - B*x */ 1507 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1508 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1509 1510 /* local sweep */ 1511 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1512 } 1513 } else if (flag & SOR_EISENSTAT) { 1514 Vec xx1; 1515 1516 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1517 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1518 1519 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 if (!mat->diag) { 1522 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1523 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1524 } 1525 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1526 if (hasop) { 1527 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1528 } else { 1529 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1530 } 1531 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1532 1533 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1534 1535 /* local sweep */ 1536 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1537 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1538 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1539 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1540 1541 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1542 1543 matin->errortype = mat->A->errortype; 1544 PetscFunctionReturn(0); 1545 } 1546 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatPermute_MPIAIJ" 1549 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1550 { 1551 Mat aA,aB,Aperm; 1552 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1553 PetscScalar *aa,*ba; 1554 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1555 PetscSF rowsf,sf; 1556 IS parcolp = NULL; 1557 PetscBool done; 1558 PetscErrorCode ierr; 1559 1560 PetscFunctionBegin; 1561 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1562 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1563 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1564 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1565 1566 /* Invert row permutation to find out where my rows should go */ 1567 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1568 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1569 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1570 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1571 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1572 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1573 1574 /* Invert column permutation to find out where my columns should go */ 1575 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1576 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1577 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1578 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1579 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1580 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1581 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1582 1583 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1584 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1585 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1586 1587 /* Find out where my gcols should go */ 1588 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1589 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1590 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1591 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1592 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1593 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1594 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1595 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1596 1597 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1598 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1599 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1600 for (i=0; i<m; i++) { 1601 PetscInt row = rdest[i],rowner; 1602 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1603 for (j=ai[i]; j<ai[i+1]; j++) { 1604 PetscInt cowner,col = cdest[aj[j]]; 1605 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1606 if (rowner == cowner) dnnz[i]++; 1607 else onnz[i]++; 1608 } 1609 for (j=bi[i]; j<bi[i+1]; j++) { 1610 PetscInt cowner,col = gcdest[bj[j]]; 1611 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1612 if (rowner == cowner) dnnz[i]++; 1613 else onnz[i]++; 1614 } 1615 } 1616 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1617 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1618 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1619 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1620 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1621 1622 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1623 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1624 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1625 for (i=0; i<m; i++) { 1626 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1627 PetscInt j0,rowlen; 1628 rowlen = ai[i+1] - ai[i]; 1629 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1630 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1631 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1632 } 1633 rowlen = bi[i+1] - bi[i]; 1634 for (j0=j=0; j<rowlen; j0=j) { 1635 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1636 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1637 } 1638 } 1639 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1640 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1641 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1642 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1643 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1644 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1645 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1646 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1647 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1648 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1649 *B = Aperm; 1650 PetscFunctionReturn(0); 1651 } 1652 1653 #undef __FUNCT__ 1654 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1655 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1656 { 1657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1658 PetscErrorCode ierr; 1659 1660 PetscFunctionBegin; 1661 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1662 if (ghosts) *ghosts = aij->garray; 1663 PetscFunctionReturn(0); 1664 } 1665 1666 #undef __FUNCT__ 1667 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1668 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1669 { 1670 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1671 Mat A = mat->A,B = mat->B; 1672 PetscErrorCode ierr; 1673 PetscReal isend[5],irecv[5]; 1674 1675 PetscFunctionBegin; 1676 info->block_size = 1.0; 1677 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1678 1679 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1680 isend[3] = info->memory; isend[4] = info->mallocs; 1681 1682 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1683 1684 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1685 isend[3] += info->memory; isend[4] += info->mallocs; 1686 if (flag == MAT_LOCAL) { 1687 info->nz_used = isend[0]; 1688 info->nz_allocated = isend[1]; 1689 info->nz_unneeded = isend[2]; 1690 info->memory = isend[3]; 1691 info->mallocs = isend[4]; 1692 } else if (flag == MAT_GLOBAL_MAX) { 1693 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1694 1695 info->nz_used = irecv[0]; 1696 info->nz_allocated = irecv[1]; 1697 info->nz_unneeded = irecv[2]; 1698 info->memory = irecv[3]; 1699 info->mallocs = irecv[4]; 1700 } else if (flag == MAT_GLOBAL_SUM) { 1701 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1702 1703 info->nz_used = irecv[0]; 1704 info->nz_allocated = irecv[1]; 1705 info->nz_unneeded = irecv[2]; 1706 info->memory = irecv[3]; 1707 info->mallocs = irecv[4]; 1708 } 1709 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1710 info->fill_ratio_needed = 0; 1711 info->factor_mallocs = 0; 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatSetOption_MPIAIJ" 1717 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1718 { 1719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1720 PetscErrorCode ierr; 1721 1722 PetscFunctionBegin; 1723 switch (op) { 1724 case MAT_NEW_NONZERO_LOCATIONS: 1725 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1726 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1727 case MAT_KEEP_NONZERO_PATTERN: 1728 case MAT_NEW_NONZERO_LOCATION_ERR: 1729 case MAT_USE_INODES: 1730 case MAT_IGNORE_ZERO_ENTRIES: 1731 MatCheckPreallocated(A,1); 1732 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1733 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1734 break; 1735 case MAT_ROW_ORIENTED: 1736 MatCheckPreallocated(A,1); 1737 a->roworiented = flg; 1738 1739 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1740 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1741 break; 1742 case MAT_NEW_DIAGONALS: 1743 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1744 break; 1745 case MAT_IGNORE_OFF_PROC_ENTRIES: 1746 a->donotstash = flg; 1747 break; 1748 case MAT_SPD: 1749 A->spd_set = PETSC_TRUE; 1750 A->spd = flg; 1751 if (flg) { 1752 A->symmetric = PETSC_TRUE; 1753 A->structurally_symmetric = PETSC_TRUE; 1754 A->symmetric_set = PETSC_TRUE; 1755 A->structurally_symmetric_set = PETSC_TRUE; 1756 } 1757 break; 1758 case MAT_SYMMETRIC: 1759 MatCheckPreallocated(A,1); 1760 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1761 break; 1762 case MAT_STRUCTURALLY_SYMMETRIC: 1763 MatCheckPreallocated(A,1); 1764 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1765 break; 1766 case MAT_HERMITIAN: 1767 MatCheckPreallocated(A,1); 1768 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1769 break; 1770 case MAT_SYMMETRY_ETERNAL: 1771 MatCheckPreallocated(A,1); 1772 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1773 break; 1774 default: 1775 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1776 } 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatGetRow_MPIAIJ" 1782 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1783 { 1784 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1785 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1786 PetscErrorCode ierr; 1787 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1788 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1789 PetscInt *cmap,*idx_p; 1790 1791 PetscFunctionBegin; 1792 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1793 mat->getrowactive = PETSC_TRUE; 1794 1795 if (!mat->rowvalues && (idx || v)) { 1796 /* 1797 allocate enough space to hold information from the longest row. 1798 */ 1799 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1800 PetscInt max = 1,tmp; 1801 for (i=0; i<matin->rmap->n; i++) { 1802 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1803 if (max < tmp) max = tmp; 1804 } 1805 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1806 } 1807 1808 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1809 lrow = row - rstart; 1810 1811 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1812 if (!v) {pvA = 0; pvB = 0;} 1813 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1814 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1815 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1816 nztot = nzA + nzB; 1817 1818 cmap = mat->garray; 1819 if (v || idx) { 1820 if (nztot) { 1821 /* Sort by increasing column numbers, assuming A and B already sorted */ 1822 PetscInt imark = -1; 1823 if (v) { 1824 *v = v_p = mat->rowvalues; 1825 for (i=0; i<nzB; i++) { 1826 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1827 else break; 1828 } 1829 imark = i; 1830 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1831 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1832 } 1833 if (idx) { 1834 *idx = idx_p = mat->rowindices; 1835 if (imark > -1) { 1836 for (i=0; i<imark; i++) { 1837 idx_p[i] = cmap[cworkB[i]]; 1838 } 1839 } else { 1840 for (i=0; i<nzB; i++) { 1841 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1842 else break; 1843 } 1844 imark = i; 1845 } 1846 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1847 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1848 } 1849 } else { 1850 if (idx) *idx = 0; 1851 if (v) *v = 0; 1852 } 1853 } 1854 *nz = nztot; 1855 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1856 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1857 PetscFunctionReturn(0); 1858 } 1859 1860 #undef __FUNCT__ 1861 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1862 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1863 { 1864 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1865 1866 PetscFunctionBegin; 1867 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1868 aij->getrowactive = PETSC_FALSE; 1869 PetscFunctionReturn(0); 1870 } 1871 1872 #undef __FUNCT__ 1873 #define __FUNCT__ "MatNorm_MPIAIJ" 1874 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1875 { 1876 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1877 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1878 PetscErrorCode ierr; 1879 PetscInt i,j,cstart = mat->cmap->rstart; 1880 PetscReal sum = 0.0; 1881 MatScalar *v; 1882 1883 PetscFunctionBegin; 1884 if (aij->size == 1) { 1885 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1886 } else { 1887 if (type == NORM_FROBENIUS) { 1888 v = amat->a; 1889 for (i=0; i<amat->nz; i++) { 1890 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1891 } 1892 v = bmat->a; 1893 for (i=0; i<bmat->nz; i++) { 1894 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1895 } 1896 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1897 *norm = PetscSqrtReal(*norm); 1898 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1899 } else if (type == NORM_1) { /* max column norm */ 1900 PetscReal *tmp,*tmp2; 1901 PetscInt *jj,*garray = aij->garray; 1902 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1903 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1904 *norm = 0.0; 1905 v = amat->a; jj = amat->j; 1906 for (j=0; j<amat->nz; j++) { 1907 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1908 } 1909 v = bmat->a; jj = bmat->j; 1910 for (j=0; j<bmat->nz; j++) { 1911 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1912 } 1913 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1914 for (j=0; j<mat->cmap->N; j++) { 1915 if (tmp2[j] > *norm) *norm = tmp2[j]; 1916 } 1917 ierr = PetscFree(tmp);CHKERRQ(ierr); 1918 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1919 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1920 } else if (type == NORM_INFINITY) { /* max row norm */ 1921 PetscReal ntemp = 0.0; 1922 for (j=0; j<aij->A->rmap->n; j++) { 1923 v = amat->a + amat->i[j]; 1924 sum = 0.0; 1925 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1926 sum += PetscAbsScalar(*v); v++; 1927 } 1928 v = bmat->a + bmat->i[j]; 1929 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1930 sum += PetscAbsScalar(*v); v++; 1931 } 1932 if (sum > ntemp) ntemp = sum; 1933 } 1934 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1935 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1936 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1937 } 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatTranspose_MPIAIJ" 1943 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1944 { 1945 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1946 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1947 PetscErrorCode ierr; 1948 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1949 PetscInt cstart = A->cmap->rstart,ncol; 1950 Mat B; 1951 MatScalar *array; 1952 1953 PetscFunctionBegin; 1954 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1955 1956 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1957 ai = Aloc->i; aj = Aloc->j; 1958 bi = Bloc->i; bj = Bloc->j; 1959 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1960 PetscInt *d_nnz,*g_nnz,*o_nnz; 1961 PetscSFNode *oloc; 1962 PETSC_UNUSED PetscSF sf; 1963 1964 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1965 /* compute d_nnz for preallocation */ 1966 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1967 for (i=0; i<ai[ma]; i++) { 1968 d_nnz[aj[i]]++; 1969 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1970 } 1971 /* compute local off-diagonal contributions */ 1972 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1973 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1974 /* map those to global */ 1975 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1976 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1977 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1978 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1979 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1980 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1981 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1982 1983 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1984 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1985 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1986 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1987 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1988 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1989 } else { 1990 B = *matout; 1991 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1992 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1993 } 1994 1995 /* copy over the A part */ 1996 array = Aloc->a; 1997 row = A->rmap->rstart; 1998 for (i=0; i<ma; i++) { 1999 ncol = ai[i+1]-ai[i]; 2000 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2001 row++; 2002 array += ncol; aj += ncol; 2003 } 2004 aj = Aloc->j; 2005 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2006 2007 /* copy over the B part */ 2008 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2009 array = Bloc->a; 2010 row = A->rmap->rstart; 2011 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2012 cols_tmp = cols; 2013 for (i=0; i<mb; i++) { 2014 ncol = bi[i+1]-bi[i]; 2015 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2016 row++; 2017 array += ncol; cols_tmp += ncol; 2018 } 2019 ierr = PetscFree(cols);CHKERRQ(ierr); 2020 2021 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2022 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2023 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2024 *matout = B; 2025 } else { 2026 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2027 } 2028 PetscFunctionReturn(0); 2029 } 2030 2031 #undef __FUNCT__ 2032 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2033 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2034 { 2035 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2036 Mat a = aij->A,b = aij->B; 2037 PetscErrorCode ierr; 2038 PetscInt s1,s2,s3; 2039 2040 PetscFunctionBegin; 2041 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2042 if (rr) { 2043 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2044 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2045 /* Overlap communication with computation. */ 2046 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2047 } 2048 if (ll) { 2049 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2050 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2051 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2052 } 2053 /* scale the diagonal block */ 2054 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2055 2056 if (rr) { 2057 /* Do a scatter end and then right scale the off-diagonal block */ 2058 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2059 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2060 } 2061 PetscFunctionReturn(0); 2062 } 2063 2064 #undef __FUNCT__ 2065 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2066 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2067 { 2068 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2069 PetscErrorCode ierr; 2070 2071 PetscFunctionBegin; 2072 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatEqual_MPIAIJ" 2078 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2079 { 2080 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2081 Mat a,b,c,d; 2082 PetscBool flg; 2083 PetscErrorCode ierr; 2084 2085 PetscFunctionBegin; 2086 a = matA->A; b = matA->B; 2087 c = matB->A; d = matB->B; 2088 2089 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2090 if (flg) { 2091 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2092 } 2093 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2094 PetscFunctionReturn(0); 2095 } 2096 2097 #undef __FUNCT__ 2098 #define __FUNCT__ "MatCopy_MPIAIJ" 2099 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2100 { 2101 PetscErrorCode ierr; 2102 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2103 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2104 2105 PetscFunctionBegin; 2106 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2107 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2108 /* because of the column compression in the off-processor part of the matrix a->B, 2109 the number of columns in a->B and b->B may be different, hence we cannot call 2110 the MatCopy() directly on the two parts. If need be, we can provide a more 2111 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2112 then copying the submatrices */ 2113 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2114 } else { 2115 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2116 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2117 } 2118 PetscFunctionReturn(0); 2119 } 2120 2121 #undef __FUNCT__ 2122 #define __FUNCT__ "MatSetUp_MPIAIJ" 2123 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2124 { 2125 PetscErrorCode ierr; 2126 2127 PetscFunctionBegin; 2128 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2129 PetscFunctionReturn(0); 2130 } 2131 2132 /* 2133 Computes the number of nonzeros per row needed for preallocation when X and Y 2134 have different nonzero structure. 2135 */ 2136 #undef __FUNCT__ 2137 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2138 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2139 { 2140 PetscInt i,j,k,nzx,nzy; 2141 2142 PetscFunctionBegin; 2143 /* Set the number of nonzeros in the new matrix */ 2144 for (i=0; i<m; i++) { 2145 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2146 nzx = xi[i+1] - xi[i]; 2147 nzy = yi[i+1] - yi[i]; 2148 nnz[i] = 0; 2149 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2150 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2151 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2152 nnz[i]++; 2153 } 2154 for (; k<nzy; k++) nnz[i]++; 2155 } 2156 PetscFunctionReturn(0); 2157 } 2158 2159 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2160 #undef __FUNCT__ 2161 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2162 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2163 { 2164 PetscErrorCode ierr; 2165 PetscInt m = Y->rmap->N; 2166 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2167 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2168 2169 PetscFunctionBegin; 2170 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2171 PetscFunctionReturn(0); 2172 } 2173 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatAXPY_MPIAIJ" 2176 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2177 { 2178 PetscErrorCode ierr; 2179 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2180 PetscBLASInt bnz,one=1; 2181 Mat_SeqAIJ *x,*y; 2182 2183 PetscFunctionBegin; 2184 if (str == SAME_NONZERO_PATTERN) { 2185 PetscScalar alpha = a; 2186 x = (Mat_SeqAIJ*)xx->A->data; 2187 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2188 y = (Mat_SeqAIJ*)yy->A->data; 2189 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2190 x = (Mat_SeqAIJ*)xx->B->data; 2191 y = (Mat_SeqAIJ*)yy->B->data; 2192 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2193 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2194 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2195 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2196 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2197 } else { 2198 Mat B; 2199 PetscInt *nnz_d,*nnz_o; 2200 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2201 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2202 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2203 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2204 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2205 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2206 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2207 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2208 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2209 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2210 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2211 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2212 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2213 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2214 } 2215 PetscFunctionReturn(0); 2216 } 2217 2218 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2219 2220 #undef __FUNCT__ 2221 #define __FUNCT__ "MatConjugate_MPIAIJ" 2222 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2223 { 2224 #if defined(PETSC_USE_COMPLEX) 2225 PetscErrorCode ierr; 2226 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2227 2228 PetscFunctionBegin; 2229 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2230 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2231 #else 2232 PetscFunctionBegin; 2233 #endif 2234 PetscFunctionReturn(0); 2235 } 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatRealPart_MPIAIJ" 2239 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2240 { 2241 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2242 PetscErrorCode ierr; 2243 2244 PetscFunctionBegin; 2245 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2246 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #undef __FUNCT__ 2251 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2252 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2253 { 2254 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2255 PetscErrorCode ierr; 2256 2257 PetscFunctionBegin; 2258 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2259 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2260 PetscFunctionReturn(0); 2261 } 2262 2263 #undef __FUNCT__ 2264 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2265 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2266 { 2267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2268 PetscErrorCode ierr; 2269 PetscInt i,*idxb = 0; 2270 PetscScalar *va,*vb; 2271 Vec vtmp; 2272 2273 PetscFunctionBegin; 2274 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2275 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2276 if (idx) { 2277 for (i=0; i<A->rmap->n; i++) { 2278 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2279 } 2280 } 2281 2282 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2283 if (idx) { 2284 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2285 } 2286 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2287 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2288 2289 for (i=0; i<A->rmap->n; i++) { 2290 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2291 va[i] = vb[i]; 2292 if (idx) idx[i] = a->garray[idxb[i]]; 2293 } 2294 } 2295 2296 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2297 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2298 ierr = PetscFree(idxb);CHKERRQ(ierr); 2299 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2305 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2306 { 2307 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2308 PetscErrorCode ierr; 2309 PetscInt i,*idxb = 0; 2310 PetscScalar *va,*vb; 2311 Vec vtmp; 2312 2313 PetscFunctionBegin; 2314 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2315 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2316 if (idx) { 2317 for (i=0; i<A->cmap->n; i++) { 2318 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2319 } 2320 } 2321 2322 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2323 if (idx) { 2324 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2325 } 2326 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2327 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2328 2329 for (i=0; i<A->rmap->n; i++) { 2330 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2331 va[i] = vb[i]; 2332 if (idx) idx[i] = a->garray[idxb[i]]; 2333 } 2334 } 2335 2336 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2337 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2338 ierr = PetscFree(idxb);CHKERRQ(ierr); 2339 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 #undef __FUNCT__ 2344 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2345 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2346 { 2347 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2348 PetscInt n = A->rmap->n; 2349 PetscInt cstart = A->cmap->rstart; 2350 PetscInt *cmap = mat->garray; 2351 PetscInt *diagIdx, *offdiagIdx; 2352 Vec diagV, offdiagV; 2353 PetscScalar *a, *diagA, *offdiagA; 2354 PetscInt r; 2355 PetscErrorCode ierr; 2356 2357 PetscFunctionBegin; 2358 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2359 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2360 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2361 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2362 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2363 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2364 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2365 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2366 for (r = 0; r < n; ++r) { 2367 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2368 a[r] = diagA[r]; 2369 idx[r] = cstart + diagIdx[r]; 2370 } else { 2371 a[r] = offdiagA[r]; 2372 idx[r] = cmap[offdiagIdx[r]]; 2373 } 2374 } 2375 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2376 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2377 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2378 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2379 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2380 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2386 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2387 { 2388 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2389 PetscInt n = A->rmap->n; 2390 PetscInt cstart = A->cmap->rstart; 2391 PetscInt *cmap = mat->garray; 2392 PetscInt *diagIdx, *offdiagIdx; 2393 Vec diagV, offdiagV; 2394 PetscScalar *a, *diagA, *offdiagA; 2395 PetscInt r; 2396 PetscErrorCode ierr; 2397 2398 PetscFunctionBegin; 2399 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2400 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2401 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2402 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2403 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2404 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2405 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2406 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2407 for (r = 0; r < n; ++r) { 2408 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2409 a[r] = diagA[r]; 2410 idx[r] = cstart + diagIdx[r]; 2411 } else { 2412 a[r] = offdiagA[r]; 2413 idx[r] = cmap[offdiagIdx[r]]; 2414 } 2415 } 2416 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2417 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2418 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2419 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2420 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2421 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 2425 #undef __FUNCT__ 2426 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2427 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2428 { 2429 PetscErrorCode ierr; 2430 Mat *dummy; 2431 2432 PetscFunctionBegin; 2433 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2434 *newmat = *dummy; 2435 ierr = PetscFree(dummy);CHKERRQ(ierr); 2436 PetscFunctionReturn(0); 2437 } 2438 2439 #undef __FUNCT__ 2440 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2441 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2442 { 2443 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2444 PetscErrorCode ierr; 2445 2446 PetscFunctionBegin; 2447 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2448 A->errortype = a->A->errortype; 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2454 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2455 { 2456 PetscErrorCode ierr; 2457 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2458 2459 PetscFunctionBegin; 2460 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2461 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2462 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2463 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 #undef __FUNCT__ 2468 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2469 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2470 { 2471 PetscFunctionBegin; 2472 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2473 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2474 PetscFunctionReturn(0); 2475 } 2476 2477 #undef __FUNCT__ 2478 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2479 /*@ 2480 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2481 2482 Collective on Mat 2483 2484 Input Parameters: 2485 + A - the matrix 2486 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2487 2488 Level: advanced 2489 2490 @*/ 2491 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2492 { 2493 PetscErrorCode ierr; 2494 2495 PetscFunctionBegin; 2496 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 #undef __FUNCT__ 2501 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2502 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2503 { 2504 PetscErrorCode ierr; 2505 PetscBool sc = PETSC_FALSE,flg; 2506 2507 PetscFunctionBegin; 2508 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2509 ierr = PetscObjectOptionsBegin((PetscObject)A); 2510 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2511 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2512 if (flg) { 2513 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2514 } 2515 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2516 PetscFunctionReturn(0); 2517 } 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "MatShift_MPIAIJ" 2521 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2522 { 2523 PetscErrorCode ierr; 2524 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2525 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2526 2527 PetscFunctionBegin; 2528 if (!Y->preallocated) { 2529 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2530 } else if (!aij->nz) { 2531 PetscInt nonew = aij->nonew; 2532 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2533 aij->nonew = nonew; 2534 } 2535 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 #undef __FUNCT__ 2540 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2541 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2542 { 2543 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2544 PetscErrorCode ierr; 2545 2546 PetscFunctionBegin; 2547 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2548 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2549 if (d) { 2550 PetscInt rstart; 2551 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2552 *d += rstart; 2553 2554 } 2555 PetscFunctionReturn(0); 2556 } 2557 2558 2559 /* -------------------------------------------------------------------*/ 2560 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2561 MatGetRow_MPIAIJ, 2562 MatRestoreRow_MPIAIJ, 2563 MatMult_MPIAIJ, 2564 /* 4*/ MatMultAdd_MPIAIJ, 2565 MatMultTranspose_MPIAIJ, 2566 MatMultTransposeAdd_MPIAIJ, 2567 0, 2568 0, 2569 0, 2570 /*10*/ 0, 2571 0, 2572 0, 2573 MatSOR_MPIAIJ, 2574 MatTranspose_MPIAIJ, 2575 /*15*/ MatGetInfo_MPIAIJ, 2576 MatEqual_MPIAIJ, 2577 MatGetDiagonal_MPIAIJ, 2578 MatDiagonalScale_MPIAIJ, 2579 MatNorm_MPIAIJ, 2580 /*20*/ MatAssemblyBegin_MPIAIJ, 2581 MatAssemblyEnd_MPIAIJ, 2582 MatSetOption_MPIAIJ, 2583 MatZeroEntries_MPIAIJ, 2584 /*24*/ MatZeroRows_MPIAIJ, 2585 0, 2586 0, 2587 0, 2588 0, 2589 /*29*/ MatSetUp_MPIAIJ, 2590 0, 2591 0, 2592 MatGetDiagonalBlock_MPIAIJ, 2593 0, 2594 /*34*/ MatDuplicate_MPIAIJ, 2595 0, 2596 0, 2597 0, 2598 0, 2599 /*39*/ MatAXPY_MPIAIJ, 2600 MatGetSubMatrices_MPIAIJ, 2601 MatIncreaseOverlap_MPIAIJ, 2602 MatGetValues_MPIAIJ, 2603 MatCopy_MPIAIJ, 2604 /*44*/ MatGetRowMax_MPIAIJ, 2605 MatScale_MPIAIJ, 2606 MatShift_MPIAIJ, 2607 MatDiagonalSet_MPIAIJ, 2608 MatZeroRowsColumns_MPIAIJ, 2609 /*49*/ MatSetRandom_MPIAIJ, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*54*/ MatFDColoringCreate_MPIXAIJ, 2615 0, 2616 MatSetUnfactored_MPIAIJ, 2617 MatPermute_MPIAIJ, 2618 0, 2619 /*59*/ MatGetSubMatrix_MPIAIJ, 2620 MatDestroy_MPIAIJ, 2621 MatView_MPIAIJ, 2622 0, 2623 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2624 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2625 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2626 0, 2627 0, 2628 0, 2629 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2630 MatGetRowMinAbs_MPIAIJ, 2631 0, 2632 MatSetColoring_MPIAIJ, 2633 0, 2634 MatSetValuesAdifor_MPIAIJ, 2635 /*75*/ MatFDColoringApply_AIJ, 2636 MatSetFromOptions_MPIAIJ, 2637 0, 2638 0, 2639 MatFindZeroDiagonals_MPIAIJ, 2640 /*80*/ 0, 2641 0, 2642 0, 2643 /*83*/ MatLoad_MPIAIJ, 2644 0, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2650 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2651 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2652 MatPtAP_MPIAIJ_MPIAIJ, 2653 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2654 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2655 0, 2656 0, 2657 0, 2658 0, 2659 /*99*/ 0, 2660 0, 2661 0, 2662 MatConjugate_MPIAIJ, 2663 0, 2664 /*104*/MatSetValuesRow_MPIAIJ, 2665 MatRealPart_MPIAIJ, 2666 MatImaginaryPart_MPIAIJ, 2667 0, 2668 0, 2669 /*109*/0, 2670 0, 2671 MatGetRowMin_MPIAIJ, 2672 0, 2673 MatMissingDiagonal_MPIAIJ, 2674 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2675 0, 2676 MatGetGhosts_MPIAIJ, 2677 0, 2678 0, 2679 /*119*/0, 2680 0, 2681 0, 2682 0, 2683 MatGetMultiProcBlock_MPIAIJ, 2684 /*124*/MatFindNonzeroRows_MPIAIJ, 2685 MatGetColumnNorms_MPIAIJ, 2686 MatInvertBlockDiagonal_MPIAIJ, 2687 0, 2688 MatGetSubMatricesMPI_MPIAIJ, 2689 /*129*/0, 2690 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2691 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2692 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2693 0, 2694 /*134*/0, 2695 0, 2696 0, 2697 0, 2698 0, 2699 /*139*/0, 2700 0, 2701 0, 2702 MatFDColoringSetUp_MPIXAIJ, 2703 MatFindOffBlockDiagonalEntries_MPIAIJ, 2704 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2705 }; 2706 2707 /* ----------------------------------------------------------------------------------------*/ 2708 2709 #undef __FUNCT__ 2710 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2711 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2712 { 2713 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2714 PetscErrorCode ierr; 2715 2716 PetscFunctionBegin; 2717 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2718 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2719 PetscFunctionReturn(0); 2720 } 2721 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2724 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2725 { 2726 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2727 PetscErrorCode ierr; 2728 2729 PetscFunctionBegin; 2730 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2731 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2732 PetscFunctionReturn(0); 2733 } 2734 2735 #undef __FUNCT__ 2736 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2737 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2738 { 2739 Mat_MPIAIJ *b; 2740 PetscErrorCode ierr; 2741 2742 PetscFunctionBegin; 2743 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2744 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2745 b = (Mat_MPIAIJ*)B->data; 2746 2747 if (!B->preallocated) { 2748 /* Explicitly create 2 MATSEQAIJ matrices. */ 2749 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2750 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2751 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2752 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2753 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2754 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2755 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2756 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2757 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2758 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2759 } 2760 2761 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2762 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2763 B->preallocated = PETSC_TRUE; 2764 PetscFunctionReturn(0); 2765 } 2766 2767 #undef __FUNCT__ 2768 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2769 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2770 { 2771 Mat mat; 2772 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2773 PetscErrorCode ierr; 2774 2775 PetscFunctionBegin; 2776 *newmat = 0; 2777 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2778 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2779 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2780 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2781 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2782 a = (Mat_MPIAIJ*)mat->data; 2783 2784 mat->factortype = matin->factortype; 2785 mat->assembled = PETSC_TRUE; 2786 mat->insertmode = NOT_SET_VALUES; 2787 mat->preallocated = PETSC_TRUE; 2788 2789 a->size = oldmat->size; 2790 a->rank = oldmat->rank; 2791 a->donotstash = oldmat->donotstash; 2792 a->roworiented = oldmat->roworiented; 2793 a->rowindices = 0; 2794 a->rowvalues = 0; 2795 a->getrowactive = PETSC_FALSE; 2796 2797 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2798 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2799 2800 if (oldmat->colmap) { 2801 #if defined(PETSC_USE_CTABLE) 2802 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2803 #else 2804 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2805 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2806 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2807 #endif 2808 } else a->colmap = 0; 2809 if (oldmat->garray) { 2810 PetscInt len; 2811 len = oldmat->B->cmap->n; 2812 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2813 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2814 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2815 } else a->garray = 0; 2816 2817 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2818 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2819 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2820 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2821 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2822 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2823 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2824 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2825 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2826 *newmat = mat; 2827 PetscFunctionReturn(0); 2828 } 2829 2830 2831 2832 #undef __FUNCT__ 2833 #define __FUNCT__ "MatLoad_MPIAIJ" 2834 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2835 { 2836 PetscScalar *vals,*svals; 2837 MPI_Comm comm; 2838 PetscErrorCode ierr; 2839 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2840 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2841 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2842 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2843 PetscInt cend,cstart,n,*rowners; 2844 int fd; 2845 PetscInt bs = newMat->rmap->bs; 2846 2847 PetscFunctionBegin; 2848 /* force binary viewer to load .info file if it has not yet done so */ 2849 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2850 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2851 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2852 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2853 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2854 if (!rank) { 2855 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2856 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2857 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MPIAIJ"); 2858 } 2859 2860 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr); 2861 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2862 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2863 if (bs < 0) bs = 1; 2864 2865 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2866 M = header[1]; N = header[2]; 2867 2868 /* If global sizes are set, check if they are consistent with that given in the file */ 2869 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2870 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2871 2872 /* determine ownership of all (block) rows */ 2873 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2874 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2875 else m = newMat->rmap->n; /* Set by user */ 2876 2877 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2878 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2879 2880 /* First process needs enough room for process with most rows */ 2881 if (!rank) { 2882 mmax = rowners[1]; 2883 for (i=2; i<=size; i++) { 2884 mmax = PetscMax(mmax, rowners[i]); 2885 } 2886 } else mmax = -1; /* unused, but compilers complain */ 2887 2888 rowners[0] = 0; 2889 for (i=2; i<=size; i++) { 2890 rowners[i] += rowners[i-1]; 2891 } 2892 rstart = rowners[rank]; 2893 rend = rowners[rank+1]; 2894 2895 /* distribute row lengths to all processors */ 2896 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2897 if (!rank) { 2898 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2899 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2900 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2901 for (j=0; j<m; j++) { 2902 procsnz[0] += ourlens[j]; 2903 } 2904 for (i=1; i<size; i++) { 2905 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2906 /* calculate the number of nonzeros on each processor */ 2907 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2908 procsnz[i] += rowlengths[j]; 2909 } 2910 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2911 } 2912 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2913 } else { 2914 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2915 } 2916 2917 if (!rank) { 2918 /* determine max buffer needed and allocate it */ 2919 maxnz = 0; 2920 for (i=0; i<size; i++) { 2921 maxnz = PetscMax(maxnz,procsnz[i]); 2922 } 2923 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2924 2925 /* read in my part of the matrix column indices */ 2926 nz = procsnz[0]; 2927 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2928 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2929 2930 /* read in every one elses and ship off */ 2931 for (i=1; i<size; i++) { 2932 nz = procsnz[i]; 2933 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2934 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2935 } 2936 ierr = PetscFree(cols);CHKERRQ(ierr); 2937 } else { 2938 /* determine buffer space needed for message */ 2939 nz = 0; 2940 for (i=0; i<m; i++) { 2941 nz += ourlens[i]; 2942 } 2943 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2944 2945 /* receive message of column indices*/ 2946 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2947 } 2948 2949 /* determine column ownership if matrix is not square */ 2950 if (N != M) { 2951 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2952 else n = newMat->cmap->n; 2953 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2954 cstart = cend - n; 2955 } else { 2956 cstart = rstart; 2957 cend = rend; 2958 n = cend - cstart; 2959 } 2960 2961 /* loop over local rows, determining number of off diagonal entries */ 2962 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2963 jj = 0; 2964 for (i=0; i<m; i++) { 2965 for (j=0; j<ourlens[i]; j++) { 2966 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2967 jj++; 2968 } 2969 } 2970 2971 for (i=0; i<m; i++) { 2972 ourlens[i] -= offlens[i]; 2973 } 2974 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2975 2976 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2977 2978 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2979 2980 for (i=0; i<m; i++) { 2981 ourlens[i] += offlens[i]; 2982 } 2983 2984 if (!rank) { 2985 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2986 2987 /* read in my part of the matrix numerical values */ 2988 nz = procsnz[0]; 2989 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2990 2991 /* insert into matrix */ 2992 jj = rstart; 2993 smycols = mycols; 2994 svals = vals; 2995 for (i=0; i<m; i++) { 2996 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2997 smycols += ourlens[i]; 2998 svals += ourlens[i]; 2999 jj++; 3000 } 3001 3002 /* read in other processors and ship out */ 3003 for (i=1; i<size; i++) { 3004 nz = procsnz[i]; 3005 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3006 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3007 } 3008 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3009 } else { 3010 /* receive numeric values */ 3011 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3012 3013 /* receive message of values*/ 3014 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3015 3016 /* insert into matrix */ 3017 jj = rstart; 3018 smycols = mycols; 3019 svals = vals; 3020 for (i=0; i<m; i++) { 3021 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3022 smycols += ourlens[i]; 3023 svals += ourlens[i]; 3024 jj++; 3025 } 3026 } 3027 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3028 ierr = PetscFree(vals);CHKERRQ(ierr); 3029 ierr = PetscFree(mycols);CHKERRQ(ierr); 3030 ierr = PetscFree(rowners);CHKERRQ(ierr); 3031 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3032 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3033 PetscFunctionReturn(0); 3034 } 3035 3036 #undef __FUNCT__ 3037 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3038 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3039 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3040 { 3041 PetscErrorCode ierr; 3042 IS iscol_local; 3043 PetscInt csize; 3044 3045 PetscFunctionBegin; 3046 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3047 if (call == MAT_REUSE_MATRIX) { 3048 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3049 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3050 } else { 3051 /* check if we are grabbing all columns*/ 3052 PetscBool isstride; 3053 PetscMPIInt lisstride = 0,gisstride; 3054 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3055 if (isstride) { 3056 PetscInt start,len,mstart,mlen; 3057 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3058 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3059 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3060 if (mstart == start && mlen-mstart == len) lisstride = 1; 3061 } 3062 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3063 if (gisstride) { 3064 PetscInt N; 3065 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3066 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3067 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3068 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3069 } else { 3070 PetscInt cbs; 3071 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3072 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3073 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3074 } 3075 } 3076 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3077 if (call == MAT_INITIAL_MATRIX) { 3078 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3079 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3080 } 3081 PetscFunctionReturn(0); 3082 } 3083 3084 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3085 #undef __FUNCT__ 3086 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3087 /* 3088 Not great since it makes two copies of the submatrix, first an SeqAIJ 3089 in local and then by concatenating the local matrices the end result. 3090 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3091 3092 Note: This requires a sequential iscol with all indices. 3093 */ 3094 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3095 { 3096 PetscErrorCode ierr; 3097 PetscMPIInt rank,size; 3098 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3099 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3100 PetscBool allcolumns, colflag; 3101 Mat M,Mreuse; 3102 MatScalar *vwork,*aa; 3103 MPI_Comm comm; 3104 Mat_SeqAIJ *aij; 3105 3106 PetscFunctionBegin; 3107 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3108 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3109 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3110 3111 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3112 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3113 if (colflag && ncol == mat->cmap->N) { 3114 allcolumns = PETSC_TRUE; 3115 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3116 } else { 3117 allcolumns = PETSC_FALSE; 3118 } 3119 if (call == MAT_REUSE_MATRIX) { 3120 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3121 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3122 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3123 } else { 3124 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3125 } 3126 3127 /* 3128 m - number of local rows 3129 n - number of columns (same on all processors) 3130 rstart - first row in new global matrix generated 3131 */ 3132 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3133 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3134 if (call == MAT_INITIAL_MATRIX) { 3135 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3136 ii = aij->i; 3137 jj = aij->j; 3138 3139 /* 3140 Determine the number of non-zeros in the diagonal and off-diagonal 3141 portions of the matrix in order to do correct preallocation 3142 */ 3143 3144 /* first get start and end of "diagonal" columns */ 3145 if (csize == PETSC_DECIDE) { 3146 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3147 if (mglobal == n) { /* square matrix */ 3148 nlocal = m; 3149 } else { 3150 nlocal = n/size + ((n % size) > rank); 3151 } 3152 } else { 3153 nlocal = csize; 3154 } 3155 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3156 rstart = rend - nlocal; 3157 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3158 3159 /* next, compute all the lengths */ 3160 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3161 olens = dlens + m; 3162 for (i=0; i<m; i++) { 3163 jend = ii[i+1] - ii[i]; 3164 olen = 0; 3165 dlen = 0; 3166 for (j=0; j<jend; j++) { 3167 if (*jj < rstart || *jj >= rend) olen++; 3168 else dlen++; 3169 jj++; 3170 } 3171 olens[i] = olen; 3172 dlens[i] = dlen; 3173 } 3174 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3175 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3176 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3177 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3178 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3179 ierr = PetscFree(dlens);CHKERRQ(ierr); 3180 } else { 3181 PetscInt ml,nl; 3182 3183 M = *newmat; 3184 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3185 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3186 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3187 /* 3188 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3189 rather than the slower MatSetValues(). 3190 */ 3191 M->was_assembled = PETSC_TRUE; 3192 M->assembled = PETSC_FALSE; 3193 } 3194 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3195 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3196 ii = aij->i; 3197 jj = aij->j; 3198 aa = aij->a; 3199 for (i=0; i<m; i++) { 3200 row = rstart + i; 3201 nz = ii[i+1] - ii[i]; 3202 cwork = jj; jj += nz; 3203 vwork = aa; aa += nz; 3204 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3205 } 3206 3207 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3208 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3209 *newmat = M; 3210 3211 /* save submatrix used in processor for next request */ 3212 if (call == MAT_INITIAL_MATRIX) { 3213 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3214 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3215 } 3216 PetscFunctionReturn(0); 3217 } 3218 3219 #undef __FUNCT__ 3220 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3221 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3222 { 3223 PetscInt m,cstart, cend,j,nnz,i,d; 3224 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3225 const PetscInt *JJ; 3226 PetscScalar *values; 3227 PetscErrorCode ierr; 3228 3229 PetscFunctionBegin; 3230 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3231 3232 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3233 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3234 m = B->rmap->n; 3235 cstart = B->cmap->rstart; 3236 cend = B->cmap->rend; 3237 rstart = B->rmap->rstart; 3238 3239 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3240 3241 #if defined(PETSC_USE_DEBUGGING) 3242 for (i=0; i<m; i++) { 3243 nnz = Ii[i+1]- Ii[i]; 3244 JJ = J + Ii[i]; 3245 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3246 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3247 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3248 } 3249 #endif 3250 3251 for (i=0; i<m; i++) { 3252 nnz = Ii[i+1]- Ii[i]; 3253 JJ = J + Ii[i]; 3254 nnz_max = PetscMax(nnz_max,nnz); 3255 d = 0; 3256 for (j=0; j<nnz; j++) { 3257 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3258 } 3259 d_nnz[i] = d; 3260 o_nnz[i] = nnz - d; 3261 } 3262 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3263 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3264 3265 if (v) values = (PetscScalar*)v; 3266 else { 3267 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3268 } 3269 3270 for (i=0; i<m; i++) { 3271 ii = i + rstart; 3272 nnz = Ii[i+1]- Ii[i]; 3273 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3274 } 3275 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3276 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3277 3278 if (!v) { 3279 ierr = PetscFree(values);CHKERRQ(ierr); 3280 } 3281 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3282 PetscFunctionReturn(0); 3283 } 3284 3285 #undef __FUNCT__ 3286 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3287 /*@ 3288 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3289 (the default parallel PETSc format). 3290 3291 Collective on MPI_Comm 3292 3293 Input Parameters: 3294 + B - the matrix 3295 . i - the indices into j for the start of each local row (starts with zero) 3296 . j - the column indices for each local row (starts with zero) 3297 - v - optional values in the matrix 3298 3299 Level: developer 3300 3301 Notes: 3302 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3303 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3304 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3305 3306 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3307 3308 The format which is used for the sparse matrix input, is equivalent to a 3309 row-major ordering.. i.e for the following matrix, the input data expected is 3310 as shown 3311 3312 $ 1 0 0 3313 $ 2 0 3 P0 3314 $ ------- 3315 $ 4 5 6 P1 3316 $ 3317 $ Process0 [P0]: rows_owned=[0,1] 3318 $ i = {0,1,3} [size = nrow+1 = 2+1] 3319 $ j = {0,0,2} [size = 3] 3320 $ v = {1,2,3} [size = 3] 3321 $ 3322 $ Process1 [P1]: rows_owned=[2] 3323 $ i = {0,3} [size = nrow+1 = 1+1] 3324 $ j = {0,1,2} [size = 3] 3325 $ v = {4,5,6} [size = 3] 3326 3327 .keywords: matrix, aij, compressed row, sparse, parallel 3328 3329 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3330 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3331 @*/ 3332 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3333 { 3334 PetscErrorCode ierr; 3335 3336 PetscFunctionBegin; 3337 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3338 PetscFunctionReturn(0); 3339 } 3340 3341 #undef __FUNCT__ 3342 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3343 /*@C 3344 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3345 (the default parallel PETSc format). For good matrix assembly performance 3346 the user should preallocate the matrix storage by setting the parameters 3347 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3348 performance can be increased by more than a factor of 50. 3349 3350 Collective on MPI_Comm 3351 3352 Input Parameters: 3353 + B - the matrix 3354 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3355 (same value is used for all local rows) 3356 . d_nnz - array containing the number of nonzeros in the various rows of the 3357 DIAGONAL portion of the local submatrix (possibly different for each row) 3358 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3359 The size of this array is equal to the number of local rows, i.e 'm'. 3360 For matrices that will be factored, you must leave room for (and set) 3361 the diagonal entry even if it is zero. 3362 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3363 submatrix (same value is used for all local rows). 3364 - o_nnz - array containing the number of nonzeros in the various rows of the 3365 OFF-DIAGONAL portion of the local submatrix (possibly different for 3366 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3367 structure. The size of this array is equal to the number 3368 of local rows, i.e 'm'. 3369 3370 If the *_nnz parameter is given then the *_nz parameter is ignored 3371 3372 The AIJ format (also called the Yale sparse matrix format or 3373 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3374 storage. The stored row and column indices begin with zero. 3375 See Users-Manual: ch_mat for details. 3376 3377 The parallel matrix is partitioned such that the first m0 rows belong to 3378 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3379 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3380 3381 The DIAGONAL portion of the local submatrix of a processor can be defined 3382 as the submatrix which is obtained by extraction the part corresponding to 3383 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3384 first row that belongs to the processor, r2 is the last row belonging to 3385 the this processor, and c1-c2 is range of indices of the local part of a 3386 vector suitable for applying the matrix to. This is an mxn matrix. In the 3387 common case of a square matrix, the row and column ranges are the same and 3388 the DIAGONAL part is also square. The remaining portion of the local 3389 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3390 3391 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3392 3393 You can call MatGetInfo() to get information on how effective the preallocation was; 3394 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3395 You can also run with the option -info and look for messages with the string 3396 malloc in them to see if additional memory allocation was needed. 3397 3398 Example usage: 3399 3400 Consider the following 8x8 matrix with 34 non-zero values, that is 3401 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3402 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3403 as follows: 3404 3405 .vb 3406 1 2 0 | 0 3 0 | 0 4 3407 Proc0 0 5 6 | 7 0 0 | 8 0 3408 9 0 10 | 11 0 0 | 12 0 3409 ------------------------------------- 3410 13 0 14 | 15 16 17 | 0 0 3411 Proc1 0 18 0 | 19 20 21 | 0 0 3412 0 0 0 | 22 23 0 | 24 0 3413 ------------------------------------- 3414 Proc2 25 26 27 | 0 0 28 | 29 0 3415 30 0 0 | 31 32 33 | 0 34 3416 .ve 3417 3418 This can be represented as a collection of submatrices as: 3419 3420 .vb 3421 A B C 3422 D E F 3423 G H I 3424 .ve 3425 3426 Where the submatrices A,B,C are owned by proc0, D,E,F are 3427 owned by proc1, G,H,I are owned by proc2. 3428 3429 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3430 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3431 The 'M','N' parameters are 8,8, and have the same values on all procs. 3432 3433 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3434 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3435 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3436 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3437 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3438 matrix, ans [DF] as another SeqAIJ matrix. 3439 3440 When d_nz, o_nz parameters are specified, d_nz storage elements are 3441 allocated for every row of the local diagonal submatrix, and o_nz 3442 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3443 One way to choose d_nz and o_nz is to use the max nonzerors per local 3444 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3445 In this case, the values of d_nz,o_nz are: 3446 .vb 3447 proc0 : dnz = 2, o_nz = 2 3448 proc1 : dnz = 3, o_nz = 2 3449 proc2 : dnz = 1, o_nz = 4 3450 .ve 3451 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3452 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3453 for proc3. i.e we are using 12+15+10=37 storage locations to store 3454 34 values. 3455 3456 When d_nnz, o_nnz parameters are specified, the storage is specified 3457 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3458 In the above case the values for d_nnz,o_nnz are: 3459 .vb 3460 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3461 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3462 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3463 .ve 3464 Here the space allocated is sum of all the above values i.e 34, and 3465 hence pre-allocation is perfect. 3466 3467 Level: intermediate 3468 3469 .keywords: matrix, aij, compressed row, sparse, parallel 3470 3471 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3472 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3473 @*/ 3474 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3475 { 3476 PetscErrorCode ierr; 3477 3478 PetscFunctionBegin; 3479 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3480 PetscValidType(B,1); 3481 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3482 PetscFunctionReturn(0); 3483 } 3484 3485 #undef __FUNCT__ 3486 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3487 /*@ 3488 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3489 CSR format the local rows. 3490 3491 Collective on MPI_Comm 3492 3493 Input Parameters: 3494 + comm - MPI communicator 3495 . m - number of local rows (Cannot be PETSC_DECIDE) 3496 . n - This value should be the same as the local size used in creating the 3497 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3498 calculated if N is given) For square matrices n is almost always m. 3499 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3500 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3501 . i - row indices 3502 . j - column indices 3503 - a - matrix values 3504 3505 Output Parameter: 3506 . mat - the matrix 3507 3508 Level: intermediate 3509 3510 Notes: 3511 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3512 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3513 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3514 3515 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3516 3517 The format which is used for the sparse matrix input, is equivalent to a 3518 row-major ordering.. i.e for the following matrix, the input data expected is 3519 as shown 3520 3521 $ 1 0 0 3522 $ 2 0 3 P0 3523 $ ------- 3524 $ 4 5 6 P1 3525 $ 3526 $ Process0 [P0]: rows_owned=[0,1] 3527 $ i = {0,1,3} [size = nrow+1 = 2+1] 3528 $ j = {0,0,2} [size = 3] 3529 $ v = {1,2,3} [size = 3] 3530 $ 3531 $ Process1 [P1]: rows_owned=[2] 3532 $ i = {0,3} [size = nrow+1 = 1+1] 3533 $ j = {0,1,2} [size = 3] 3534 $ v = {4,5,6} [size = 3] 3535 3536 .keywords: matrix, aij, compressed row, sparse, parallel 3537 3538 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3539 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3540 @*/ 3541 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3542 { 3543 PetscErrorCode ierr; 3544 3545 PetscFunctionBegin; 3546 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3547 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3548 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3549 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3550 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3551 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3552 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3553 PetscFunctionReturn(0); 3554 } 3555 3556 #undef __FUNCT__ 3557 #define __FUNCT__ "MatCreateAIJ" 3558 /*@C 3559 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3560 (the default parallel PETSc format). For good matrix assembly performance 3561 the user should preallocate the matrix storage by setting the parameters 3562 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3563 performance can be increased by more than a factor of 50. 3564 3565 Collective on MPI_Comm 3566 3567 Input Parameters: 3568 + comm - MPI communicator 3569 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3570 This value should be the same as the local size used in creating the 3571 y vector for the matrix-vector product y = Ax. 3572 . n - This value should be the same as the local size used in creating the 3573 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3574 calculated if N is given) For square matrices n is almost always m. 3575 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3576 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3577 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3578 (same value is used for all local rows) 3579 . d_nnz - array containing the number of nonzeros in the various rows of the 3580 DIAGONAL portion of the local submatrix (possibly different for each row) 3581 or NULL, if d_nz is used to specify the nonzero structure. 3582 The size of this array is equal to the number of local rows, i.e 'm'. 3583 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3584 submatrix (same value is used for all local rows). 3585 - o_nnz - array containing the number of nonzeros in the various rows of the 3586 OFF-DIAGONAL portion of the local submatrix (possibly different for 3587 each row) or NULL, if o_nz is used to specify the nonzero 3588 structure. The size of this array is equal to the number 3589 of local rows, i.e 'm'. 3590 3591 Output Parameter: 3592 . A - the matrix 3593 3594 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3595 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3596 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3597 3598 Notes: 3599 If the *_nnz parameter is given then the *_nz parameter is ignored 3600 3601 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3602 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3603 storage requirements for this matrix. 3604 3605 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3606 processor than it must be used on all processors that share the object for 3607 that argument. 3608 3609 The user MUST specify either the local or global matrix dimensions 3610 (possibly both). 3611 3612 The parallel matrix is partitioned across processors such that the 3613 first m0 rows belong to process 0, the next m1 rows belong to 3614 process 1, the next m2 rows belong to process 2 etc.. where 3615 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3616 values corresponding to [m x N] submatrix. 3617 3618 The columns are logically partitioned with the n0 columns belonging 3619 to 0th partition, the next n1 columns belonging to the next 3620 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3621 3622 The DIAGONAL portion of the local submatrix on any given processor 3623 is the submatrix corresponding to the rows and columns m,n 3624 corresponding to the given processor. i.e diagonal matrix on 3625 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3626 etc. The remaining portion of the local submatrix [m x (N-n)] 3627 constitute the OFF-DIAGONAL portion. The example below better 3628 illustrates this concept. 3629 3630 For a square global matrix we define each processor's diagonal portion 3631 to be its local rows and the corresponding columns (a square submatrix); 3632 each processor's off-diagonal portion encompasses the remainder of the 3633 local matrix (a rectangular submatrix). 3634 3635 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3636 3637 When calling this routine with a single process communicator, a matrix of 3638 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3639 type of communicator, use the construction mechanism: 3640 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3641 3642 By default, this format uses inodes (identical nodes) when possible. 3643 We search for consecutive rows with the same nonzero structure, thereby 3644 reusing matrix information to achieve increased efficiency. 3645 3646 Options Database Keys: 3647 + -mat_no_inode - Do not use inodes 3648 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3649 - -mat_aij_oneindex - Internally use indexing starting at 1 3650 rather than 0. Note that when calling MatSetValues(), 3651 the user still MUST index entries starting at 0! 3652 3653 3654 Example usage: 3655 3656 Consider the following 8x8 matrix with 34 non-zero values, that is 3657 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3658 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3659 as follows: 3660 3661 .vb 3662 1 2 0 | 0 3 0 | 0 4 3663 Proc0 0 5 6 | 7 0 0 | 8 0 3664 9 0 10 | 11 0 0 | 12 0 3665 ------------------------------------- 3666 13 0 14 | 15 16 17 | 0 0 3667 Proc1 0 18 0 | 19 20 21 | 0 0 3668 0 0 0 | 22 23 0 | 24 0 3669 ------------------------------------- 3670 Proc2 25 26 27 | 0 0 28 | 29 0 3671 30 0 0 | 31 32 33 | 0 34 3672 .ve 3673 3674 This can be represented as a collection of submatrices as: 3675 3676 .vb 3677 A B C 3678 D E F 3679 G H I 3680 .ve 3681 3682 Where the submatrices A,B,C are owned by proc0, D,E,F are 3683 owned by proc1, G,H,I are owned by proc2. 3684 3685 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3686 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3687 The 'M','N' parameters are 8,8, and have the same values on all procs. 3688 3689 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3690 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3691 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3692 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3693 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3694 matrix, ans [DF] as another SeqAIJ matrix. 3695 3696 When d_nz, o_nz parameters are specified, d_nz storage elements are 3697 allocated for every row of the local diagonal submatrix, and o_nz 3698 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3699 One way to choose d_nz and o_nz is to use the max nonzerors per local 3700 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3701 In this case, the values of d_nz,o_nz are: 3702 .vb 3703 proc0 : dnz = 2, o_nz = 2 3704 proc1 : dnz = 3, o_nz = 2 3705 proc2 : dnz = 1, o_nz = 4 3706 .ve 3707 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3708 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3709 for proc3. i.e we are using 12+15+10=37 storage locations to store 3710 34 values. 3711 3712 When d_nnz, o_nnz parameters are specified, the storage is specified 3713 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3714 In the above case the values for d_nnz,o_nnz are: 3715 .vb 3716 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3717 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3718 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3719 .ve 3720 Here the space allocated is sum of all the above values i.e 34, and 3721 hence pre-allocation is perfect. 3722 3723 Level: intermediate 3724 3725 .keywords: matrix, aij, compressed row, sparse, parallel 3726 3727 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3728 MPIAIJ, MatCreateMPIAIJWithArrays() 3729 @*/ 3730 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3731 { 3732 PetscErrorCode ierr; 3733 PetscMPIInt size; 3734 3735 PetscFunctionBegin; 3736 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3737 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3738 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3739 if (size > 1) { 3740 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3741 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3742 } else { 3743 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3744 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3745 } 3746 PetscFunctionReturn(0); 3747 } 3748 3749 #undef __FUNCT__ 3750 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3751 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3752 { 3753 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3754 PetscBool flg; 3755 PetscErrorCode ierr; 3756 3757 PetscFunctionBegin; 3758 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3759 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MPIAIJ matrix as input"); 3760 if (Ad) *Ad = a->A; 3761 if (Ao) *Ao = a->B; 3762 if (colmap) *colmap = a->garray; 3763 PetscFunctionReturn(0); 3764 } 3765 3766 #undef __FUNCT__ 3767 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3768 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3769 { 3770 PetscErrorCode ierr; 3771 PetscInt i; 3772 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3773 3774 PetscFunctionBegin; 3775 if (coloring->ctype == IS_COLORING_GLOBAL) { 3776 ISColoringValue *allcolors,*colors; 3777 ISColoring ocoloring; 3778 3779 /* set coloring for diagonal portion */ 3780 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3781 3782 /* set coloring for off-diagonal portion */ 3783 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3784 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3785 for (i=0; i<a->B->cmap->n; i++) { 3786 colors[i] = allcolors[a->garray[i]]; 3787 } 3788 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3789 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3790 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3791 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3792 } else if (coloring->ctype == IS_COLORING_LOCAL) { 3793 ISColoringValue *colors; 3794 PetscInt *larray; 3795 ISColoring ocoloring; 3796 3797 /* set coloring for diagonal portion */ 3798 ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr); 3799 for (i=0; i<a->A->cmap->n; i++) { 3800 larray[i] = i + A->cmap->rstart; 3801 } 3802 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3803 ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr); 3804 for (i=0; i<a->A->cmap->n; i++) { 3805 colors[i] = coloring->colors[larray[i]]; 3806 } 3807 ierr = PetscFree(larray);CHKERRQ(ierr); 3808 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3809 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3810 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3811 3812 /* set coloring for off-diagonal portion */ 3813 ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr); 3814 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3815 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3816 for (i=0; i<a->B->cmap->n; i++) { 3817 colors[i] = coloring->colors[larray[i]]; 3818 } 3819 ierr = PetscFree(larray);CHKERRQ(ierr); 3820 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3821 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3822 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3823 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3824 PetscFunctionReturn(0); 3825 } 3826 3827 #undef __FUNCT__ 3828 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3829 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3830 { 3831 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3832 PetscErrorCode ierr; 3833 3834 PetscFunctionBegin; 3835 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3836 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3837 PetscFunctionReturn(0); 3838 } 3839 3840 #undef __FUNCT__ 3841 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3842 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3843 { 3844 PetscErrorCode ierr; 3845 PetscInt m,N,i,rstart,nnz,Ii; 3846 PetscInt *indx; 3847 PetscScalar *values; 3848 3849 PetscFunctionBegin; 3850 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3851 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3852 PetscInt *dnz,*onz,sum,bs,cbs; 3853 3854 if (n == PETSC_DECIDE) { 3855 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3856 } 3857 /* Check sum(n) = N */ 3858 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3859 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3860 3861 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3862 rstart -= m; 3863 3864 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3865 for (i=0; i<m; i++) { 3866 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3867 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3868 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3869 } 3870 3871 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3872 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3873 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3874 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3875 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3876 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3877 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3878 } 3879 3880 /* numeric phase */ 3881 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3882 for (i=0; i<m; i++) { 3883 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3884 Ii = i + rstart; 3885 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3886 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3887 } 3888 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3889 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3890 PetscFunctionReturn(0); 3891 } 3892 3893 #undef __FUNCT__ 3894 #define __FUNCT__ "MatFileSplit" 3895 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3896 { 3897 PetscErrorCode ierr; 3898 PetscMPIInt rank; 3899 PetscInt m,N,i,rstart,nnz; 3900 size_t len; 3901 const PetscInt *indx; 3902 PetscViewer out; 3903 char *name; 3904 Mat B; 3905 const PetscScalar *values; 3906 3907 PetscFunctionBegin; 3908 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3909 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3910 /* Should this be the type of the diagonal block of A? */ 3911 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3912 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3913 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3914 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3915 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3916 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3917 for (i=0; i<m; i++) { 3918 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3919 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3920 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3921 } 3922 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3923 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3924 3925 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3926 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3927 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3928 sprintf(name,"%s.%d",outfile,rank); 3929 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3930 ierr = PetscFree(name);CHKERRQ(ierr); 3931 ierr = MatView(B,out);CHKERRQ(ierr); 3932 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3933 ierr = MatDestroy(&B);CHKERRQ(ierr); 3934 PetscFunctionReturn(0); 3935 } 3936 3937 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3938 #undef __FUNCT__ 3939 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3940 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3941 { 3942 PetscErrorCode ierr; 3943 Mat_Merge_SeqsToMPI *merge; 3944 PetscContainer container; 3945 3946 PetscFunctionBegin; 3947 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3948 if (container) { 3949 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3950 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3951 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3952 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3953 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3954 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3955 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3956 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3957 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3958 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3959 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3960 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3961 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3962 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3963 ierr = PetscFree(merge);CHKERRQ(ierr); 3964 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3965 } 3966 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3967 PetscFunctionReturn(0); 3968 } 3969 3970 #include <../src/mat/utils/freespace.h> 3971 #include <petscbt.h> 3972 3973 #undef __FUNCT__ 3974 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3975 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3976 { 3977 PetscErrorCode ierr; 3978 MPI_Comm comm; 3979 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3980 PetscMPIInt size,rank,taga,*len_s; 3981 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3982 PetscInt proc,m; 3983 PetscInt **buf_ri,**buf_rj; 3984 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3985 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3986 MPI_Request *s_waits,*r_waits; 3987 MPI_Status *status; 3988 MatScalar *aa=a->a; 3989 MatScalar **abuf_r,*ba_i; 3990 Mat_Merge_SeqsToMPI *merge; 3991 PetscContainer container; 3992 3993 PetscFunctionBegin; 3994 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3995 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3996 3997 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3998 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3999 4000 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4001 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4002 4003 bi = merge->bi; 4004 bj = merge->bj; 4005 buf_ri = merge->buf_ri; 4006 buf_rj = merge->buf_rj; 4007 4008 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4009 owners = merge->rowmap->range; 4010 len_s = merge->len_s; 4011 4012 /* send and recv matrix values */ 4013 /*-----------------------------*/ 4014 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4015 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4016 4017 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4018 for (proc=0,k=0; proc<size; proc++) { 4019 if (!len_s[proc]) continue; 4020 i = owners[proc]; 4021 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4022 k++; 4023 } 4024 4025 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4026 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4027 ierr = PetscFree(status);CHKERRQ(ierr); 4028 4029 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4030 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4031 4032 /* insert mat values of mpimat */ 4033 /*----------------------------*/ 4034 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4035 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4036 4037 for (k=0; k<merge->nrecv; k++) { 4038 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4039 nrows = *(buf_ri_k[k]); 4040 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4041 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4042 } 4043 4044 /* set values of ba */ 4045 m = merge->rowmap->n; 4046 for (i=0; i<m; i++) { 4047 arow = owners[rank] + i; 4048 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4049 bnzi = bi[i+1] - bi[i]; 4050 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4051 4052 /* add local non-zero vals of this proc's seqmat into ba */ 4053 anzi = ai[arow+1] - ai[arow]; 4054 aj = a->j + ai[arow]; 4055 aa = a->a + ai[arow]; 4056 nextaj = 0; 4057 for (j=0; nextaj<anzi; j++) { 4058 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4059 ba_i[j] += aa[nextaj++]; 4060 } 4061 } 4062 4063 /* add received vals into ba */ 4064 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4065 /* i-th row */ 4066 if (i == *nextrow[k]) { 4067 anzi = *(nextai[k]+1) - *nextai[k]; 4068 aj = buf_rj[k] + *(nextai[k]); 4069 aa = abuf_r[k] + *(nextai[k]); 4070 nextaj = 0; 4071 for (j=0; nextaj<anzi; j++) { 4072 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4073 ba_i[j] += aa[nextaj++]; 4074 } 4075 } 4076 nextrow[k]++; nextai[k]++; 4077 } 4078 } 4079 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4080 } 4081 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4082 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4083 4084 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4085 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4086 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4087 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4088 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4089 PetscFunctionReturn(0); 4090 } 4091 4092 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4093 4094 #undef __FUNCT__ 4095 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4096 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4097 { 4098 PetscErrorCode ierr; 4099 Mat B_mpi; 4100 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4101 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4102 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4103 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4104 PetscInt len,proc,*dnz,*onz,bs,cbs; 4105 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4106 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4107 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4108 MPI_Status *status; 4109 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4110 PetscBT lnkbt; 4111 Mat_Merge_SeqsToMPI *merge; 4112 PetscContainer container; 4113 4114 PetscFunctionBegin; 4115 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4116 4117 /* make sure it is a PETSc comm */ 4118 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4119 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4120 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4121 4122 ierr = PetscNew(&merge);CHKERRQ(ierr); 4123 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4124 4125 /* determine row ownership */ 4126 /*---------------------------------------------------------*/ 4127 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4128 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4129 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4130 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4131 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4132 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4133 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4134 4135 m = merge->rowmap->n; 4136 owners = merge->rowmap->range; 4137 4138 /* determine the number of messages to send, their lengths */ 4139 /*---------------------------------------------------------*/ 4140 len_s = merge->len_s; 4141 4142 len = 0; /* length of buf_si[] */ 4143 merge->nsend = 0; 4144 for (proc=0; proc<size; proc++) { 4145 len_si[proc] = 0; 4146 if (proc == rank) { 4147 len_s[proc] = 0; 4148 } else { 4149 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4150 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4151 } 4152 if (len_s[proc]) { 4153 merge->nsend++; 4154 nrows = 0; 4155 for (i=owners[proc]; i<owners[proc+1]; i++) { 4156 if (ai[i+1] > ai[i]) nrows++; 4157 } 4158 len_si[proc] = 2*(nrows+1); 4159 len += len_si[proc]; 4160 } 4161 } 4162 4163 /* determine the number and length of messages to receive for ij-structure */ 4164 /*-------------------------------------------------------------------------*/ 4165 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4166 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4167 4168 /* post the Irecv of j-structure */ 4169 /*-------------------------------*/ 4170 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4171 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4172 4173 /* post the Isend of j-structure */ 4174 /*--------------------------------*/ 4175 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4176 4177 for (proc=0, k=0; proc<size; proc++) { 4178 if (!len_s[proc]) continue; 4179 i = owners[proc]; 4180 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4181 k++; 4182 } 4183 4184 /* receives and sends of j-structure are complete */ 4185 /*------------------------------------------------*/ 4186 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4187 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4188 4189 /* send and recv i-structure */ 4190 /*---------------------------*/ 4191 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4192 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4193 4194 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4195 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4196 for (proc=0,k=0; proc<size; proc++) { 4197 if (!len_s[proc]) continue; 4198 /* form outgoing message for i-structure: 4199 buf_si[0]: nrows to be sent 4200 [1:nrows]: row index (global) 4201 [nrows+1:2*nrows+1]: i-structure index 4202 */ 4203 /*-------------------------------------------*/ 4204 nrows = len_si[proc]/2 - 1; 4205 buf_si_i = buf_si + nrows+1; 4206 buf_si[0] = nrows; 4207 buf_si_i[0] = 0; 4208 nrows = 0; 4209 for (i=owners[proc]; i<owners[proc+1]; i++) { 4210 anzi = ai[i+1] - ai[i]; 4211 if (anzi) { 4212 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4213 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4214 nrows++; 4215 } 4216 } 4217 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4218 k++; 4219 buf_si += len_si[proc]; 4220 } 4221 4222 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4223 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4224 4225 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4226 for (i=0; i<merge->nrecv; i++) { 4227 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4228 } 4229 4230 ierr = PetscFree(len_si);CHKERRQ(ierr); 4231 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4232 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4233 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4234 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4235 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4236 ierr = PetscFree(status);CHKERRQ(ierr); 4237 4238 /* compute a local seq matrix in each processor */ 4239 /*----------------------------------------------*/ 4240 /* allocate bi array and free space for accumulating nonzero column info */ 4241 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4242 bi[0] = 0; 4243 4244 /* create and initialize a linked list */ 4245 nlnk = N+1; 4246 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4247 4248 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4249 len = ai[owners[rank+1]] - ai[owners[rank]]; 4250 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4251 4252 current_space = free_space; 4253 4254 /* determine symbolic info for each local row */ 4255 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4256 4257 for (k=0; k<merge->nrecv; k++) { 4258 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4259 nrows = *buf_ri_k[k]; 4260 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4261 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4262 } 4263 4264 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4265 len = 0; 4266 for (i=0; i<m; i++) { 4267 bnzi = 0; 4268 /* add local non-zero cols of this proc's seqmat into lnk */ 4269 arow = owners[rank] + i; 4270 anzi = ai[arow+1] - ai[arow]; 4271 aj = a->j + ai[arow]; 4272 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4273 bnzi += nlnk; 4274 /* add received col data into lnk */ 4275 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4276 if (i == *nextrow[k]) { /* i-th row */ 4277 anzi = *(nextai[k]+1) - *nextai[k]; 4278 aj = buf_rj[k] + *nextai[k]; 4279 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4280 bnzi += nlnk; 4281 nextrow[k]++; nextai[k]++; 4282 } 4283 } 4284 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4285 4286 /* if free space is not available, make more free space */ 4287 if (current_space->local_remaining<bnzi) { 4288 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4289 nspacedouble++; 4290 } 4291 /* copy data into free space, then initialize lnk */ 4292 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4293 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4294 4295 current_space->array += bnzi; 4296 current_space->local_used += bnzi; 4297 current_space->local_remaining -= bnzi; 4298 4299 bi[i+1] = bi[i] + bnzi; 4300 } 4301 4302 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4303 4304 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4305 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4306 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4307 4308 /* create symbolic parallel matrix B_mpi */ 4309 /*---------------------------------------*/ 4310 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4311 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4312 if (n==PETSC_DECIDE) { 4313 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4314 } else { 4315 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4316 } 4317 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4318 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4319 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4320 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4321 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4322 4323 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4324 B_mpi->assembled = PETSC_FALSE; 4325 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4326 merge->bi = bi; 4327 merge->bj = bj; 4328 merge->buf_ri = buf_ri; 4329 merge->buf_rj = buf_rj; 4330 merge->coi = NULL; 4331 merge->coj = NULL; 4332 merge->owners_co = NULL; 4333 4334 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4335 4336 /* attach the supporting struct to B_mpi for reuse */ 4337 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4338 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4339 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4340 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4341 *mpimat = B_mpi; 4342 4343 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4344 PetscFunctionReturn(0); 4345 } 4346 4347 #undef __FUNCT__ 4348 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4349 /*@C 4350 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4351 matrices from each processor 4352 4353 Collective on MPI_Comm 4354 4355 Input Parameters: 4356 + comm - the communicators the parallel matrix will live on 4357 . seqmat - the input sequential matrices 4358 . m - number of local rows (or PETSC_DECIDE) 4359 . n - number of local columns (or PETSC_DECIDE) 4360 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4361 4362 Output Parameter: 4363 . mpimat - the parallel matrix generated 4364 4365 Level: advanced 4366 4367 Notes: 4368 The dimensions of the sequential matrix in each processor MUST be the same. 4369 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4370 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4371 @*/ 4372 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4373 { 4374 PetscErrorCode ierr; 4375 PetscMPIInt size; 4376 4377 PetscFunctionBegin; 4378 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4379 if (size == 1) { 4380 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4381 if (scall == MAT_INITIAL_MATRIX) { 4382 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4383 } else { 4384 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4385 } 4386 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4387 PetscFunctionReturn(0); 4388 } 4389 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4390 if (scall == MAT_INITIAL_MATRIX) { 4391 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4392 } 4393 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4394 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4395 PetscFunctionReturn(0); 4396 } 4397 4398 #undef __FUNCT__ 4399 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4400 /*@ 4401 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4402 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4403 with MatGetSize() 4404 4405 Not Collective 4406 4407 Input Parameters: 4408 + A - the matrix 4409 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4410 4411 Output Parameter: 4412 . A_loc - the local sequential matrix generated 4413 4414 Level: developer 4415 4416 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4417 4418 @*/ 4419 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4420 { 4421 PetscErrorCode ierr; 4422 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4423 Mat_SeqAIJ *mat,*a,*b; 4424 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4425 MatScalar *aa,*ba,*cam; 4426 PetscScalar *ca; 4427 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4428 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4429 PetscBool match; 4430 MPI_Comm comm; 4431 PetscMPIInt size; 4432 4433 PetscFunctionBegin; 4434 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4435 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4436 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4437 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4438 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4439 4440 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4441 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4442 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4443 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4444 aa = a->a; ba = b->a; 4445 if (scall == MAT_INITIAL_MATRIX) { 4446 if (size == 1) { 4447 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4448 PetscFunctionReturn(0); 4449 } 4450 4451 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4452 ci[0] = 0; 4453 for (i=0; i<am; i++) { 4454 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4455 } 4456 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4457 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4458 k = 0; 4459 for (i=0; i<am; i++) { 4460 ncols_o = bi[i+1] - bi[i]; 4461 ncols_d = ai[i+1] - ai[i]; 4462 /* off-diagonal portion of A */ 4463 for (jo=0; jo<ncols_o; jo++) { 4464 col = cmap[*bj]; 4465 if (col >= cstart) break; 4466 cj[k] = col; bj++; 4467 ca[k++] = *ba++; 4468 } 4469 /* diagonal portion of A */ 4470 for (j=0; j<ncols_d; j++) { 4471 cj[k] = cstart + *aj++; 4472 ca[k++] = *aa++; 4473 } 4474 /* off-diagonal portion of A */ 4475 for (j=jo; j<ncols_o; j++) { 4476 cj[k] = cmap[*bj++]; 4477 ca[k++] = *ba++; 4478 } 4479 } 4480 /* put together the new matrix */ 4481 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4482 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4483 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4484 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4485 mat->free_a = PETSC_TRUE; 4486 mat->free_ij = PETSC_TRUE; 4487 mat->nonew = 0; 4488 } else if (scall == MAT_REUSE_MATRIX) { 4489 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4490 ci = mat->i; cj = mat->j; cam = mat->a; 4491 for (i=0; i<am; i++) { 4492 /* off-diagonal portion of A */ 4493 ncols_o = bi[i+1] - bi[i]; 4494 for (jo=0; jo<ncols_o; jo++) { 4495 col = cmap[*bj]; 4496 if (col >= cstart) break; 4497 *cam++ = *ba++; bj++; 4498 } 4499 /* diagonal portion of A */ 4500 ncols_d = ai[i+1] - ai[i]; 4501 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4502 /* off-diagonal portion of A */ 4503 for (j=jo; j<ncols_o; j++) { 4504 *cam++ = *ba++; bj++; 4505 } 4506 } 4507 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4508 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 #undef __FUNCT__ 4513 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4514 /*@C 4515 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4516 4517 Not Collective 4518 4519 Input Parameters: 4520 + A - the matrix 4521 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4522 - row, col - index sets of rows and columns to extract (or NULL) 4523 4524 Output Parameter: 4525 . A_loc - the local sequential matrix generated 4526 4527 Level: developer 4528 4529 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4530 4531 @*/ 4532 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4533 { 4534 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4535 PetscErrorCode ierr; 4536 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4537 IS isrowa,iscola; 4538 Mat *aloc; 4539 PetscBool match; 4540 4541 PetscFunctionBegin; 4542 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4543 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4544 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4545 if (!row) { 4546 start = A->rmap->rstart; end = A->rmap->rend; 4547 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4548 } else { 4549 isrowa = *row; 4550 } 4551 if (!col) { 4552 start = A->cmap->rstart; 4553 cmap = a->garray; 4554 nzA = a->A->cmap->n; 4555 nzB = a->B->cmap->n; 4556 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4557 ncols = 0; 4558 for (i=0; i<nzB; i++) { 4559 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4560 else break; 4561 } 4562 imark = i; 4563 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4564 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4565 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4566 } else { 4567 iscola = *col; 4568 } 4569 if (scall != MAT_INITIAL_MATRIX) { 4570 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4571 aloc[0] = *A_loc; 4572 } 4573 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4574 *A_loc = aloc[0]; 4575 ierr = PetscFree(aloc);CHKERRQ(ierr); 4576 if (!row) { 4577 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4578 } 4579 if (!col) { 4580 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4581 } 4582 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4583 PetscFunctionReturn(0); 4584 } 4585 4586 #undef __FUNCT__ 4587 #define __FUNCT__ "MatGetBrowsOfAcols" 4588 /*@C 4589 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4590 4591 Collective on Mat 4592 4593 Input Parameters: 4594 + A,B - the matrices in mpiaij format 4595 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4596 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4597 4598 Output Parameter: 4599 + rowb, colb - index sets of rows and columns of B to extract 4600 - B_seq - the sequential matrix generated 4601 4602 Level: developer 4603 4604 @*/ 4605 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4606 { 4607 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4608 PetscErrorCode ierr; 4609 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4610 IS isrowb,iscolb; 4611 Mat *bseq=NULL; 4612 4613 PetscFunctionBegin; 4614 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4615 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4616 } 4617 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4618 4619 if (scall == MAT_INITIAL_MATRIX) { 4620 start = A->cmap->rstart; 4621 cmap = a->garray; 4622 nzA = a->A->cmap->n; 4623 nzB = a->B->cmap->n; 4624 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4625 ncols = 0; 4626 for (i=0; i<nzB; i++) { /* row < local row index */ 4627 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4628 else break; 4629 } 4630 imark = i; 4631 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4632 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4633 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4634 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4635 } else { 4636 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4637 isrowb = *rowb; iscolb = *colb; 4638 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4639 bseq[0] = *B_seq; 4640 } 4641 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4642 *B_seq = bseq[0]; 4643 ierr = PetscFree(bseq);CHKERRQ(ierr); 4644 if (!rowb) { 4645 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4646 } else { 4647 *rowb = isrowb; 4648 } 4649 if (!colb) { 4650 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4651 } else { 4652 *colb = iscolb; 4653 } 4654 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4655 PetscFunctionReturn(0); 4656 } 4657 4658 #undef __FUNCT__ 4659 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4660 /* 4661 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4662 of the OFF-DIAGONAL portion of local A 4663 4664 Collective on Mat 4665 4666 Input Parameters: 4667 + A,B - the matrices in mpiaij format 4668 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4669 4670 Output Parameter: 4671 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4672 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4673 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4674 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4675 4676 Level: developer 4677 4678 */ 4679 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4680 { 4681 VecScatter_MPI_General *gen_to,*gen_from; 4682 PetscErrorCode ierr; 4683 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4684 Mat_SeqAIJ *b_oth; 4685 VecScatter ctx =a->Mvctx; 4686 MPI_Comm comm; 4687 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4688 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4689 PetscScalar *rvalues,*svalues; 4690 MatScalar *b_otha,*bufa,*bufA; 4691 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4692 MPI_Request *rwaits = NULL,*swaits = NULL; 4693 MPI_Status *sstatus,rstatus; 4694 PetscMPIInt jj,size; 4695 PetscInt *cols,sbs,rbs; 4696 PetscScalar *vals; 4697 4698 PetscFunctionBegin; 4699 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4700 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4701 4702 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4703 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4704 } 4705 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4706 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4707 4708 gen_to = (VecScatter_MPI_General*)ctx->todata; 4709 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4710 rvalues = gen_from->values; /* holds the length of receiving row */ 4711 svalues = gen_to->values; /* holds the length of sending row */ 4712 nrecvs = gen_from->n; 4713 nsends = gen_to->n; 4714 4715 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4716 srow = gen_to->indices; /* local row index to be sent */ 4717 sstarts = gen_to->starts; 4718 sprocs = gen_to->procs; 4719 sstatus = gen_to->sstatus; 4720 sbs = gen_to->bs; 4721 rstarts = gen_from->starts; 4722 rprocs = gen_from->procs; 4723 rbs = gen_from->bs; 4724 4725 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4726 if (scall == MAT_INITIAL_MATRIX) { 4727 /* i-array */ 4728 /*---------*/ 4729 /* post receives */ 4730 for (i=0; i<nrecvs; i++) { 4731 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4732 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4733 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4734 } 4735 4736 /* pack the outgoing message */ 4737 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4738 4739 sstartsj[0] = 0; 4740 rstartsj[0] = 0; 4741 len = 0; /* total length of j or a array to be sent */ 4742 k = 0; 4743 for (i=0; i<nsends; i++) { 4744 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4745 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4746 for (j=0; j<nrows; j++) { 4747 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4748 for (l=0; l<sbs; l++) { 4749 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4750 4751 rowlen[j*sbs+l] = ncols; 4752 4753 len += ncols; 4754 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4755 } 4756 k++; 4757 } 4758 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4759 4760 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4761 } 4762 /* recvs and sends of i-array are completed */ 4763 i = nrecvs; 4764 while (i--) { 4765 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4766 } 4767 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4768 4769 /* allocate buffers for sending j and a arrays */ 4770 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4771 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4772 4773 /* create i-array of B_oth */ 4774 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4775 4776 b_othi[0] = 0; 4777 len = 0; /* total length of j or a array to be received */ 4778 k = 0; 4779 for (i=0; i<nrecvs; i++) { 4780 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4781 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4782 for (j=0; j<nrows; j++) { 4783 b_othi[k+1] = b_othi[k] + rowlen[j]; 4784 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4785 k++; 4786 } 4787 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4788 } 4789 4790 /* allocate space for j and a arrrays of B_oth */ 4791 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4792 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4793 4794 /* j-array */ 4795 /*---------*/ 4796 /* post receives of j-array */ 4797 for (i=0; i<nrecvs; i++) { 4798 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4799 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4800 } 4801 4802 /* pack the outgoing message j-array */ 4803 k = 0; 4804 for (i=0; i<nsends; i++) { 4805 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4806 bufJ = bufj+sstartsj[i]; 4807 for (j=0; j<nrows; j++) { 4808 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4809 for (ll=0; ll<sbs; ll++) { 4810 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4811 for (l=0; l<ncols; l++) { 4812 *bufJ++ = cols[l]; 4813 } 4814 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4815 } 4816 } 4817 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4818 } 4819 4820 /* recvs and sends of j-array are completed */ 4821 i = nrecvs; 4822 while (i--) { 4823 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4824 } 4825 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4826 } else if (scall == MAT_REUSE_MATRIX) { 4827 sstartsj = *startsj_s; 4828 rstartsj = *startsj_r; 4829 bufa = *bufa_ptr; 4830 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4831 b_otha = b_oth->a; 4832 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4833 4834 /* a-array */ 4835 /*---------*/ 4836 /* post receives of a-array */ 4837 for (i=0; i<nrecvs; i++) { 4838 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4839 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4840 } 4841 4842 /* pack the outgoing message a-array */ 4843 k = 0; 4844 for (i=0; i<nsends; i++) { 4845 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4846 bufA = bufa+sstartsj[i]; 4847 for (j=0; j<nrows; j++) { 4848 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4849 for (ll=0; ll<sbs; ll++) { 4850 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4851 for (l=0; l<ncols; l++) { 4852 *bufA++ = vals[l]; 4853 } 4854 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4855 } 4856 } 4857 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4858 } 4859 /* recvs and sends of a-array are completed */ 4860 i = nrecvs; 4861 while (i--) { 4862 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4863 } 4864 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4865 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4866 4867 if (scall == MAT_INITIAL_MATRIX) { 4868 /* put together the new matrix */ 4869 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4870 4871 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4872 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4873 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4874 b_oth->free_a = PETSC_TRUE; 4875 b_oth->free_ij = PETSC_TRUE; 4876 b_oth->nonew = 0; 4877 4878 ierr = PetscFree(bufj);CHKERRQ(ierr); 4879 if (!startsj_s || !bufa_ptr) { 4880 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4881 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4882 } else { 4883 *startsj_s = sstartsj; 4884 *startsj_r = rstartsj; 4885 *bufa_ptr = bufa; 4886 } 4887 } 4888 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4889 PetscFunctionReturn(0); 4890 } 4891 4892 #undef __FUNCT__ 4893 #define __FUNCT__ "MatGetCommunicationStructs" 4894 /*@C 4895 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4896 4897 Not Collective 4898 4899 Input Parameters: 4900 . A - The matrix in mpiaij format 4901 4902 Output Parameter: 4903 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4904 . colmap - A map from global column index to local index into lvec 4905 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4906 4907 Level: developer 4908 4909 @*/ 4910 #if defined(PETSC_USE_CTABLE) 4911 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4912 #else 4913 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4914 #endif 4915 { 4916 Mat_MPIAIJ *a; 4917 4918 PetscFunctionBegin; 4919 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4920 PetscValidPointer(lvec, 2); 4921 PetscValidPointer(colmap, 3); 4922 PetscValidPointer(multScatter, 4); 4923 a = (Mat_MPIAIJ*) A->data; 4924 if (lvec) *lvec = a->lvec; 4925 if (colmap) *colmap = a->colmap; 4926 if (multScatter) *multScatter = a->Mvctx; 4927 PetscFunctionReturn(0); 4928 } 4929 4930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4933 #if defined(PETSC_HAVE_ELEMENTAL) 4934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4935 #endif 4936 4937 #undef __FUNCT__ 4938 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4939 /* 4940 Computes (B'*A')' since computing B*A directly is untenable 4941 4942 n p p 4943 ( ) ( ) ( ) 4944 m ( A ) * n ( B ) = m ( C ) 4945 ( ) ( ) ( ) 4946 4947 */ 4948 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4949 { 4950 PetscErrorCode ierr; 4951 Mat At,Bt,Ct; 4952 4953 PetscFunctionBegin; 4954 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4955 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4956 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4957 ierr = MatDestroy(&At);CHKERRQ(ierr); 4958 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4959 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4960 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4961 PetscFunctionReturn(0); 4962 } 4963 4964 #undef __FUNCT__ 4965 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4966 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4967 { 4968 PetscErrorCode ierr; 4969 PetscInt m=A->rmap->n,n=B->cmap->n; 4970 Mat Cmat; 4971 4972 PetscFunctionBegin; 4973 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4974 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4975 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4976 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4977 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4978 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4979 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4980 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4981 4982 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4983 4984 *C = Cmat; 4985 PetscFunctionReturn(0); 4986 } 4987 4988 /* ----------------------------------------------------------------*/ 4989 #undef __FUNCT__ 4990 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4991 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4992 { 4993 PetscErrorCode ierr; 4994 4995 PetscFunctionBegin; 4996 if (scall == MAT_INITIAL_MATRIX) { 4997 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4998 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4999 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5000 } 5001 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5002 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5003 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5004 PetscFunctionReturn(0); 5005 } 5006 5007 /*MC 5008 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5009 5010 Options Database Keys: 5011 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5012 5013 Level: beginner 5014 5015 .seealso: MatCreateAIJ() 5016 M*/ 5017 5018 #undef __FUNCT__ 5019 #define __FUNCT__ "MatCreate_MPIAIJ" 5020 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5021 { 5022 Mat_MPIAIJ *b; 5023 PetscErrorCode ierr; 5024 PetscMPIInt size; 5025 5026 PetscFunctionBegin; 5027 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5028 5029 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5030 B->data = (void*)b; 5031 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5032 B->assembled = PETSC_FALSE; 5033 B->insertmode = NOT_SET_VALUES; 5034 b->size = size; 5035 5036 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5037 5038 /* build cache for off array entries formed */ 5039 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5040 5041 b->donotstash = PETSC_FALSE; 5042 b->colmap = 0; 5043 b->garray = 0; 5044 b->roworiented = PETSC_TRUE; 5045 5046 /* stuff used for matrix vector multiply */ 5047 b->lvec = NULL; 5048 b->Mvctx = NULL; 5049 5050 /* stuff for MatGetRow() */ 5051 b->rowindices = 0; 5052 b->rowvalues = 0; 5053 b->getrowactive = PETSC_FALSE; 5054 5055 /* flexible pointer used in CUSP/CUSPARSE classes */ 5056 b->spptr = NULL; 5057 5058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5065 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5067 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5068 #if defined(PETSC_HAVE_ELEMENTAL) 5069 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5070 #endif 5071 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5072 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5073 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5074 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5075 PetscFunctionReturn(0); 5076 } 5077 5078 #undef __FUNCT__ 5079 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5080 /*@C 5081 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5082 and "off-diagonal" part of the matrix in CSR format. 5083 5084 Collective on MPI_Comm 5085 5086 Input Parameters: 5087 + comm - MPI communicator 5088 . m - number of local rows (Cannot be PETSC_DECIDE) 5089 . n - This value should be the same as the local size used in creating the 5090 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5091 calculated if N is given) For square matrices n is almost always m. 5092 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5093 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5094 . i - row indices for "diagonal" portion of matrix 5095 . j - column indices 5096 . a - matrix values 5097 . oi - row indices for "off-diagonal" portion of matrix 5098 . oj - column indices 5099 - oa - matrix values 5100 5101 Output Parameter: 5102 . mat - the matrix 5103 5104 Level: advanced 5105 5106 Notes: 5107 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5108 must free the arrays once the matrix has been destroyed and not before. 5109 5110 The i and j indices are 0 based 5111 5112 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5113 5114 This sets local rows and cannot be used to set off-processor values. 5115 5116 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5117 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5118 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5119 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5120 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5121 communication if it is known that only local entries will be set. 5122 5123 .keywords: matrix, aij, compressed row, sparse, parallel 5124 5125 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5126 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5127 @*/ 5128 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5129 { 5130 PetscErrorCode ierr; 5131 Mat_MPIAIJ *maij; 5132 5133 PetscFunctionBegin; 5134 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5135 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5136 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5137 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5138 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5139 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5140 maij = (Mat_MPIAIJ*) (*mat)->data; 5141 5142 (*mat)->preallocated = PETSC_TRUE; 5143 5144 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5145 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5146 5147 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5148 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5149 5150 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5151 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5152 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5153 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5154 5155 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5156 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5157 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5158 PetscFunctionReturn(0); 5159 } 5160 5161 /* 5162 Special version for direct calls from Fortran 5163 */ 5164 #include <petsc/private/fortranimpl.h> 5165 5166 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5167 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5168 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5169 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5170 #endif 5171 5172 /* Change these macros so can be used in void function */ 5173 #undef CHKERRQ 5174 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5175 #undef SETERRQ2 5176 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5177 #undef SETERRQ3 5178 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5179 #undef SETERRQ 5180 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5181 5182 #undef __FUNCT__ 5183 #define __FUNCT__ "matsetvaluesmpiaij_" 5184 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5185 { 5186 Mat mat = *mmat; 5187 PetscInt m = *mm, n = *mn; 5188 InsertMode addv = *maddv; 5189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5190 PetscScalar value; 5191 PetscErrorCode ierr; 5192 5193 MatCheckPreallocated(mat,1); 5194 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5195 5196 #if defined(PETSC_USE_DEBUG) 5197 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5198 #endif 5199 { 5200 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5201 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5202 PetscBool roworiented = aij->roworiented; 5203 5204 /* Some Variables required in the macro */ 5205 Mat A = aij->A; 5206 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5207 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5208 MatScalar *aa = a->a; 5209 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5210 Mat B = aij->B; 5211 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5212 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5213 MatScalar *ba = b->a; 5214 5215 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5216 PetscInt nonew = a->nonew; 5217 MatScalar *ap1,*ap2; 5218 5219 PetscFunctionBegin; 5220 for (i=0; i<m; i++) { 5221 if (im[i] < 0) continue; 5222 #if defined(PETSC_USE_DEBUG) 5223 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5224 #endif 5225 if (im[i] >= rstart && im[i] < rend) { 5226 row = im[i] - rstart; 5227 lastcol1 = -1; 5228 rp1 = aj + ai[row]; 5229 ap1 = aa + ai[row]; 5230 rmax1 = aimax[row]; 5231 nrow1 = ailen[row]; 5232 low1 = 0; 5233 high1 = nrow1; 5234 lastcol2 = -1; 5235 rp2 = bj + bi[row]; 5236 ap2 = ba + bi[row]; 5237 rmax2 = bimax[row]; 5238 nrow2 = bilen[row]; 5239 low2 = 0; 5240 high2 = nrow2; 5241 5242 for (j=0; j<n; j++) { 5243 if (roworiented) value = v[i*n+j]; 5244 else value = v[i+j*m]; 5245 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5246 if (in[j] >= cstart && in[j] < cend) { 5247 col = in[j] - cstart; 5248 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5249 } else if (in[j] < 0) continue; 5250 #if defined(PETSC_USE_DEBUG) 5251 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5252 #endif 5253 else { 5254 if (mat->was_assembled) { 5255 if (!aij->colmap) { 5256 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5257 } 5258 #if defined(PETSC_USE_CTABLE) 5259 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5260 col--; 5261 #else 5262 col = aij->colmap[in[j]] - 1; 5263 #endif 5264 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5265 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5266 col = in[j]; 5267 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5268 B = aij->B; 5269 b = (Mat_SeqAIJ*)B->data; 5270 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5271 rp2 = bj + bi[row]; 5272 ap2 = ba + bi[row]; 5273 rmax2 = bimax[row]; 5274 nrow2 = bilen[row]; 5275 low2 = 0; 5276 high2 = nrow2; 5277 bm = aij->B->rmap->n; 5278 ba = b->a; 5279 } 5280 } else col = in[j]; 5281 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5282 } 5283 } 5284 } else if (!aij->donotstash) { 5285 if (roworiented) { 5286 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5287 } else { 5288 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5289 } 5290 } 5291 } 5292 } 5293 PetscFunctionReturnVoid(); 5294 } 5295 5296