1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 194 { 195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 196 IS sis,gis; 197 PetscErrorCode ierr; 198 const PetscInt *isis,*igis; 199 PetscInt n,*iis,nsis,ngis,rstart,i; 200 201 PetscFunctionBegin; 202 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 203 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 204 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 205 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 206 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 207 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 208 209 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 210 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 212 n = ngis + nsis; 213 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 214 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 215 for (i=0; i<n; i++) iis[i] += rstart; 216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 217 218 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 220 ierr = ISDestroy(&sis);CHKERRQ(ierr); 221 ierr = ISDestroy(&gis);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatDistribute_MPIAIJ" 227 /* 228 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 229 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 230 231 Only for square matrices 232 233 Used by a preconditioner, hence PETSC_EXTERN 234 */ 235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 236 { 237 PetscMPIInt rank,size; 238 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 239 PetscErrorCode ierr; 240 Mat mat; 241 Mat_SeqAIJ *gmata; 242 PetscMPIInt tag; 243 MPI_Status status; 244 PetscBool aij; 245 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 250 if (!rank) { 251 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 252 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 253 } 254 if (reuse == MAT_INITIAL_MATRIX) { 255 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 256 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 257 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 258 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 259 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 260 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 261 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 262 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 263 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 264 265 rowners[0] = 0; 266 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 267 rstart = rowners[rank]; 268 rend = rowners[rank+1]; 269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 270 if (!rank) { 271 gmata = (Mat_SeqAIJ*) gmat->data; 272 /* send row lengths to all processors */ 273 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 274 for (i=1; i<size; i++) { 275 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276 } 277 /* determine number diagonal and off-diagonal counts */ 278 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 279 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 280 jj = 0; 281 for (i=0; i<m; i++) { 282 for (j=0; j<dlens[i]; j++) { 283 if (gmata->j[jj] < rstart) ld[i]++; 284 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 285 jj++; 286 } 287 } 288 /* send column indices to other processes */ 289 for (i=1; i<size; i++) { 290 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 291 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 } 294 295 /* send numerical values to other processes */ 296 for (i=1; i<size; i++) { 297 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 298 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 299 } 300 gmataa = gmata->a; 301 gmataj = gmata->j; 302 303 } else { 304 /* receive row lengths */ 305 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 306 /* receive column indices */ 307 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 309 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* determine number diagonal and off-diagonal counts */ 311 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 313 jj = 0; 314 for (i=0; i<m; i++) { 315 for (j=0; j<dlens[i]; j++) { 316 if (gmataj[jj] < rstart) ld[i]++; 317 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 318 jj++; 319 } 320 } 321 /* receive numerical values */ 322 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* set preallocation */ 326 for (i=0; i<m; i++) { 327 dlens[i] -= olens[i]; 328 } 329 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 330 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 dlens[i] += olens[i]; 334 } 335 cnt = 0; 336 for (i=0; i<m; i++) { 337 row = rstart + i; 338 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 339 cnt += dlens[i]; 340 } 341 if (rank) { 342 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 343 } 344 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 345 ierr = PetscFree(rowners);CHKERRQ(ierr); 346 347 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 348 349 *inmat = mat; 350 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 351 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 352 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 353 mat = *inmat; 354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 355 if (!rank) { 356 /* send numerical values to other processes */ 357 gmata = (Mat_SeqAIJ*) gmat->data; 358 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 359 gmataa = gmata->a; 360 for (i=1; i<size; i++) { 361 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 362 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 363 } 364 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 365 } else { 366 /* receive numerical values from process 0*/ 367 nz = Ad->nz + Ao->nz; 368 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 369 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 370 } 371 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 372 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 373 ad = Ad->a; 374 ao = Ao->a; 375 if (mat->rmap->n) { 376 i = 0; 377 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 378 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 379 } 380 for (i=1; i<mat->rmap->n; i++) { 381 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 i--; 385 if (mat->rmap->n) { 386 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (rank) { 389 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 390 } 391 } 392 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 Local utility routine that creates a mapping from the global column 399 number to the local number in the off-diagonal part of the local 400 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 401 a slightly higher hash table cost; without it it is not scalable (each processor 402 has an order N integer array but is fast to acess. 403 */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 464 { \ 465 if (col <= lastcol2) low2 = 0; \ 466 else high2 = nrow2; \ 467 lastcol2 = col; \ 468 while (high2-low2 > 5) { \ 469 t = (low2+high2)/2; \ 470 if (rp2[t] > col) high2 = t; \ 471 else low2 = t; \ 472 } \ 473 for (_i=low2; _i<high2; _i++) { \ 474 if (rp2[_i] > col) break; \ 475 if (rp2[_i] == col) { \ 476 if (addv == ADD_VALUES) ap2[_i] += value; \ 477 else ap2[_i] = value; \ 478 goto b_noinsert; \ 479 } \ 480 } \ 481 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 484 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 485 N = nrow2++ - 1; b->nz++; high2++; \ 486 /* shift up all the later entries in this row */ \ 487 for (ii=N; ii>=_i; ii--) { \ 488 rp2[ii+1] = rp2[ii]; \ 489 ap2[ii+1] = ap2[ii]; \ 490 } \ 491 rp2[_i] = col; \ 492 ap2[_i] = value; \ 493 B->nonzerostate++; \ 494 b_noinsert: ; \ 495 bilen[row] = nrow2; \ 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSetValues_MPIAIJ" 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (roworiented) value = v[i*n+j]; 577 else value = v[i+j*m]; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } else col = in[j]; 615 nonew = b->nonew; 616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!aij->donotstash) { 622 mat->assembled = PETSC_FALSE; 623 if (roworiented) { 624 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } else { 626 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } 628 } 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatGetValues_MPIAIJ" 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 642 643 PetscFunctionBegin; 644 for (i=0; i<m; i++) { 645 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 646 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 647 if (idxm[i] >= rstart && idxm[i] < rend) { 648 row = idxm[i] - rstart; 649 for (j=0; j<n; j++) { 650 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 651 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 652 if (idxn[j] >= cstart && idxn[j] < cend) { 653 col = idxn[j] - cstart; 654 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } else { 656 if (!aij->colmap) { 657 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 658 } 659 #if defined(PETSC_USE_CTABLE) 660 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 661 col--; 662 #else 663 col = aij->colmap[idxn[j]] - 1; 664 #endif 665 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 666 else { 667 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 668 } 669 } 670 } 671 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 PetscErrorCode ierr; 684 PetscInt nstash,reallocs; 685 686 PetscFunctionBegin; 687 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 688 689 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 690 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 691 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 698 { 699 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 701 PetscErrorCode ierr; 702 PetscMPIInt n; 703 PetscInt i,j,rstart,ncols,flg; 704 PetscInt *row,*col; 705 PetscBool other_disassembled; 706 PetscScalar *val; 707 708 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 709 710 PetscFunctionBegin; 711 if (!aij->donotstash && !mat->nooffprocentries) { 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n; ) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { 719 if (row[j] != rstart) break; 720 } 721 if (j < n) ncols = j-i; 722 else ncols = n-i; 723 /* Now assemble all these values with a single function call */ 724 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 725 726 i = j; 727 } 728 } 729 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 730 } 731 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 741 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 750 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 752 753 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 754 755 aij->rowvalues = 0; 756 757 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 758 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 762 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 777 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatZeroRows_MPIAIJ" 783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 784 { 785 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 786 PetscInt *owners = A->rmap->range; 787 PetscInt n = A->rmap->n; 788 PetscSF sf; 789 PetscInt *lrows; 790 PetscSFNode *rrows; 791 PetscInt r, p = 0, len = 0; 792 PetscErrorCode ierr; 793 794 PetscFunctionBegin; 795 /* Create SF where leaves are input rows and roots are owned rows */ 796 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 797 for (r = 0; r < n; ++r) lrows[r] = -1; 798 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 799 for (r = 0; r < N; ++r) { 800 const PetscInt idx = rows[r]; 801 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 802 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 803 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 804 } 805 if (A->nooffproczerorows) { 806 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 807 lrows[len++] = idx - owners[p]; 808 } else { 809 rrows[r].rank = p; 810 rrows[r].index = rows[r] - owners[p]; 811 } 812 } 813 if (!A->nooffproczerorows) { 814 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 815 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 816 /* Collect flags for rows to be zeroed */ 817 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 818 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 819 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 820 /* Compress and put in row numbers */ 821 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 822 } 823 /* fix right hand side if needed */ 824 if (x && b) { 825 const PetscScalar *xx; 826 PetscScalar *bb; 827 828 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 829 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 830 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 831 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 833 } 834 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 835 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 836 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 837 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 838 } else if (diag != 0.0) { 839 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 840 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 841 for (r = 0; r < len; ++r) { 842 const PetscInt row = lrows[r] + A->rmap->rstart; 843 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 844 } 845 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 846 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 847 } else { 848 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 849 } 850 ierr = PetscFree(lrows);CHKERRQ(ierr); 851 852 /* only change matrix nonzero state if pattern was allowed to be changed */ 853 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 854 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 855 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 856 } 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 862 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 865 PetscErrorCode ierr; 866 PetscMPIInt n = A->rmap->n; 867 PetscInt i,j,r,m,p = 0,len = 0; 868 PetscInt *lrows,*owners = A->rmap->range; 869 PetscSFNode *rrows; 870 PetscSF sf; 871 const PetscScalar *xx; 872 PetscScalar *bb,*mask; 873 Vec xmask,lmask; 874 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 875 const PetscInt *aj, *ii,*ridx; 876 PetscScalar *aa; 877 878 PetscFunctionBegin; 879 /* Create SF where leaves are input rows and roots are owned rows */ 880 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 881 for (r = 0; r < n; ++r) lrows[r] = -1; 882 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 883 for (r = 0; r < N; ++r) { 884 const PetscInt idx = rows[r]; 885 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 886 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 887 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 888 } 889 rrows[r].rank = p; 890 rrows[r].index = rows[r] - owners[p]; 891 } 892 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 893 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 894 /* Collect flags for rows to be zeroed */ 895 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 896 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 897 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 898 /* Compress and put in row numbers */ 899 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 900 /* zero diagonal part of matrix */ 901 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 902 /* handle off diagonal part of matrix */ 903 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 904 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 905 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 906 for (i=0; i<len; i++) bb[lrows[i]] = 1; 907 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 908 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 910 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 911 if (x) { 912 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 915 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 916 } 917 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 918 /* remove zeroed rows of off diagonal matrix */ 919 ii = aij->i; 920 for (i=0; i<len; i++) { 921 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 922 } 923 /* loop over all elements of off process part of matrix zeroing removed columns*/ 924 if (aij->compressedrow.use) { 925 m = aij->compressedrow.nrows; 926 ii = aij->compressedrow.i; 927 ridx = aij->compressedrow.rindex; 928 for (i=0; i<m; i++) { 929 n = ii[i+1] - ii[i]; 930 aj = aij->j + ii[i]; 931 aa = aij->a + ii[i]; 932 933 for (j=0; j<n; j++) { 934 if (PetscAbsScalar(mask[*aj])) { 935 if (b) bb[*ridx] -= *aa*xx[*aj]; 936 *aa = 0.0; 937 } 938 aa++; 939 aj++; 940 } 941 ridx++; 942 } 943 } else { /* do not use compressed row format */ 944 m = l->B->rmap->n; 945 for (i=0; i<m; i++) { 946 n = ii[i+1] - ii[i]; 947 aj = aij->j + ii[i]; 948 aa = aij->a + ii[i]; 949 for (j=0; j<n; j++) { 950 if (PetscAbsScalar(mask[*aj])) { 951 if (b) bb[i] -= *aa*xx[*aj]; 952 *aa = 0.0; 953 } 954 aa++; 955 aj++; 956 } 957 } 958 } 959 if (x) { 960 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 961 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 962 } 963 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 964 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 965 ierr = PetscFree(lrows);CHKERRQ(ierr); 966 967 /* only change matrix nonzero state if pattern was allowed to be changed */ 968 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 969 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 970 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 971 } 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatMult_MPIAIJ" 977 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 978 { 979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 980 PetscErrorCode ierr; 981 PetscInt nt; 982 983 PetscFunctionBegin; 984 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 985 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 986 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 988 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 995 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 998 PetscErrorCode ierr; 999 1000 PetscFunctionBegin; 1001 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1007 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1008 { 1009 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1010 PetscErrorCode ierr; 1011 1012 PetscFunctionBegin; 1013 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1015 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1016 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1023 { 1024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1025 PetscErrorCode ierr; 1026 PetscBool merged; 1027 1028 PetscFunctionBegin; 1029 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1030 /* do nondiagonal part */ 1031 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1032 if (!merged) { 1033 /* send it on its way */ 1034 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1035 /* do local part */ 1036 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1037 /* receive remote parts: note this assumes the values are not actually */ 1038 /* added in yy until the next line, */ 1039 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1040 } else { 1041 /* do local part */ 1042 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1043 /* send it on its way */ 1044 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1045 /* values actually were received in the Begin() but we need to call this nop */ 1046 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1047 } 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1053 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1054 { 1055 MPI_Comm comm; 1056 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1057 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1058 IS Me,Notme; 1059 PetscErrorCode ierr; 1060 PetscInt M,N,first,last,*notme,i; 1061 PetscMPIInt size; 1062 1063 PetscFunctionBegin; 1064 /* Easy test: symmetric diagonal block */ 1065 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1066 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1067 if (!*f) PetscFunctionReturn(0); 1068 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1069 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1070 if (size == 1) PetscFunctionReturn(0); 1071 1072 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1073 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1074 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1075 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1076 for (i=0; i<first; i++) notme[i] = i; 1077 for (i=last; i<M; i++) notme[i-last+first] = i; 1078 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1079 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1080 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1081 Aoff = Aoffs[0]; 1082 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1083 Boff = Boffs[0]; 1084 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1085 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1086 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1087 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1088 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1089 ierr = PetscFree(notme);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1095 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1096 { 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 /* do nondiagonal part */ 1102 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1103 /* send it on its way */ 1104 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1105 /* do local part */ 1106 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1107 /* receive remote parts */ 1108 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 /* 1113 This only works correctly for square matrices where the subblock A->A is the 1114 diagonal block 1115 */ 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1118 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1119 { 1120 PetscErrorCode ierr; 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 1123 PetscFunctionBegin; 1124 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1125 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1126 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatScale_MPIAIJ" 1132 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1139 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatDestroy_MPIAIJ" 1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1146 { 1147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 #if defined(PETSC_USE_LOG) 1152 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1153 #endif 1154 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1155 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1156 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1157 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1158 #if defined(PETSC_USE_CTABLE) 1159 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1160 #else 1161 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1162 #endif 1163 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1164 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1165 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1166 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1167 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1168 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1169 1170 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1175 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1179 #if defined(PETSC_HAVE_ELEMENTAL) 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1181 #endif 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1187 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1188 { 1189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1190 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1191 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1192 PetscErrorCode ierr; 1193 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1194 int fd; 1195 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1196 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1197 PetscScalar *column_values; 1198 PetscInt message_count,flowcontrolcount; 1199 FILE *file; 1200 1201 PetscFunctionBegin; 1202 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1203 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1204 nz = A->nz + B->nz; 1205 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1206 if (!rank) { 1207 header[0] = MAT_FILE_CLASSID; 1208 header[1] = mat->rmap->N; 1209 header[2] = mat->cmap->N; 1210 1211 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1213 /* get largest number of rows any processor has */ 1214 rlen = mat->rmap->n; 1215 range = mat->rmap->range; 1216 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1217 } else { 1218 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 rlen = mat->rmap->n; 1220 } 1221 1222 /* load up the local row counts */ 1223 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1224 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1225 1226 /* store the row lengths to the file */ 1227 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1228 if (!rank) { 1229 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1230 for (i=1; i<size; i++) { 1231 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1232 rlen = range[i+1] - range[i]; 1233 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1234 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1235 } 1236 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1237 } else { 1238 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1239 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1240 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1241 } 1242 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1243 1244 /* load up the local column indices */ 1245 nzmax = nz; /* th processor needs space a largest processor needs */ 1246 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1247 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1248 cnt = 0; 1249 for (i=0; i<mat->rmap->n; i++) { 1250 for (j=B->i[i]; j<B->i[i+1]; j++) { 1251 if ((col = garray[B->j[j]]) > cstart) break; 1252 column_indices[cnt++] = col; 1253 } 1254 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1255 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1256 } 1257 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1258 1259 /* store the column indices to the file */ 1260 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1261 if (!rank) { 1262 MPI_Status status; 1263 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 for (i=1; i<size; i++) { 1265 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1266 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1267 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1268 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1270 } 1271 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1272 } else { 1273 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1274 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1275 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1276 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1277 } 1278 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1279 1280 /* load up the local column values */ 1281 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1282 cnt = 0; 1283 for (i=0; i<mat->rmap->n; i++) { 1284 for (j=B->i[i]; j<B->i[i+1]; j++) { 1285 if (garray[B->j[j]] > cstart) break; 1286 column_values[cnt++] = B->a[j]; 1287 } 1288 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1289 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1290 } 1291 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1292 1293 /* store the column values to the file */ 1294 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1295 if (!rank) { 1296 MPI_Status status; 1297 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1298 for (i=1; i<size; i++) { 1299 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1300 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1301 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1302 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1303 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1304 } 1305 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1306 } else { 1307 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1308 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1309 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1310 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1311 } 1312 ierr = PetscFree(column_values);CHKERRQ(ierr); 1313 1314 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1315 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #include <petscdraw.h> 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1322 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1323 { 1324 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1325 PetscErrorCode ierr; 1326 PetscMPIInt rank = aij->rank,size = aij->size; 1327 PetscBool isdraw,iascii,isbinary; 1328 PetscViewer sviewer; 1329 PetscViewerFormat format; 1330 1331 PetscFunctionBegin; 1332 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1333 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1334 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1335 if (iascii) { 1336 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1337 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1338 MatInfo info; 1339 PetscBool inodes; 1340 1341 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1342 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1343 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1344 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1345 if (!inodes) { 1346 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1347 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1348 } else { 1349 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1350 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1351 } 1352 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1353 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1354 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1355 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1356 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1357 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1358 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1359 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1362 PetscInt inodecount,inodelimit,*inodes; 1363 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1364 if (inodes) { 1365 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1366 } else { 1367 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1368 } 1369 PetscFunctionReturn(0); 1370 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1371 PetscFunctionReturn(0); 1372 } 1373 } else if (isbinary) { 1374 if (size == 1) { 1375 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1376 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1377 } else { 1378 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } else if (isdraw) { 1382 PetscDraw draw; 1383 PetscBool isnull; 1384 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1385 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1386 if (isnull) PetscFunctionReturn(0); 1387 } 1388 1389 { 1390 /* assemble the entire matrix onto first processor. */ 1391 Mat A; 1392 Mat_SeqAIJ *Aloc; 1393 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1394 MatScalar *a; 1395 1396 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1397 if (!rank) { 1398 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1399 } else { 1400 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1401 } 1402 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1403 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1404 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1405 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1406 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1407 1408 /* copy over the A part */ 1409 Aloc = (Mat_SeqAIJ*)aij->A->data; 1410 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1411 row = mat->rmap->rstart; 1412 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1413 for (i=0; i<m; i++) { 1414 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1415 row++; 1416 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1417 } 1418 aj = Aloc->j; 1419 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1420 1421 /* copy over the B part */ 1422 Aloc = (Mat_SeqAIJ*)aij->B->data; 1423 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1424 row = mat->rmap->rstart; 1425 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1426 ct = cols; 1427 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1428 for (i=0; i<m; i++) { 1429 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1430 row++; 1431 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1432 } 1433 ierr = PetscFree(ct);CHKERRQ(ierr); 1434 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1435 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1436 /* 1437 Everyone has to call to draw the matrix since the graphics waits are 1438 synchronized across all processors that share the PetscDraw object 1439 */ 1440 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1441 if (!rank) { 1442 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1443 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1444 } 1445 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1446 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1447 ierr = MatDestroy(&A);CHKERRQ(ierr); 1448 } 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatView_MPIAIJ" 1454 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1455 { 1456 PetscErrorCode ierr; 1457 PetscBool iascii,isdraw,issocket,isbinary; 1458 1459 PetscFunctionBegin; 1460 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1461 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1462 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1463 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1464 if (iascii || isdraw || isbinary || issocket) { 1465 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1466 } 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatSOR_MPIAIJ" 1472 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1473 { 1474 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1475 PetscErrorCode ierr; 1476 Vec bb1 = 0; 1477 PetscBool hasop; 1478 1479 PetscFunctionBegin; 1480 if (flag == SOR_APPLY_UPPER) { 1481 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1486 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1487 } 1488 1489 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1490 if (flag & SOR_ZERO_INITIAL_GUESS) { 1491 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1492 its--; 1493 } 1494 1495 while (its--) { 1496 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 1499 /* update rhs: bb1 = bb - B*x */ 1500 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1501 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1502 1503 /* local sweep */ 1504 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1505 } 1506 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1507 if (flag & SOR_ZERO_INITIAL_GUESS) { 1508 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1509 its--; 1510 } 1511 while (its--) { 1512 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1513 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 1515 /* update rhs: bb1 = bb - B*x */ 1516 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1517 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1518 1519 /* local sweep */ 1520 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1521 } 1522 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1523 if (flag & SOR_ZERO_INITIAL_GUESS) { 1524 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1525 its--; 1526 } 1527 while (its--) { 1528 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1529 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 1531 /* update rhs: bb1 = bb - B*x */ 1532 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1533 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1534 1535 /* local sweep */ 1536 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1537 } 1538 } else if (flag & SOR_EISENSTAT) { 1539 Vec xx1; 1540 1541 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1542 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1543 1544 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1545 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1546 if (!mat->diag) { 1547 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1548 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1549 } 1550 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1551 if (hasop) { 1552 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1553 } else { 1554 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1555 } 1556 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1557 1558 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1559 1560 /* local sweep */ 1561 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1562 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1563 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1564 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1565 1566 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1567 1568 matin->errortype = mat->A->errortype; 1569 PetscFunctionReturn(0); 1570 } 1571 1572 #undef __FUNCT__ 1573 #define __FUNCT__ "MatPermute_MPIAIJ" 1574 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1575 { 1576 Mat aA,aB,Aperm; 1577 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1578 PetscScalar *aa,*ba; 1579 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1580 PetscSF rowsf,sf; 1581 IS parcolp = NULL; 1582 PetscBool done; 1583 PetscErrorCode ierr; 1584 1585 PetscFunctionBegin; 1586 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1587 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1588 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1589 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1590 1591 /* Invert row permutation to find out where my rows should go */ 1592 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1593 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1594 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1595 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1596 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1597 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1598 1599 /* Invert column permutation to find out where my columns should go */ 1600 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1601 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1602 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1603 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1604 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1605 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1606 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1607 1608 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1609 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1610 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1611 1612 /* Find out where my gcols should go */ 1613 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1614 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1615 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1616 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1617 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1618 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1619 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1620 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1621 1622 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1623 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1624 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1625 for (i=0; i<m; i++) { 1626 PetscInt row = rdest[i],rowner; 1627 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1628 for (j=ai[i]; j<ai[i+1]; j++) { 1629 PetscInt cowner,col = cdest[aj[j]]; 1630 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1631 if (rowner == cowner) dnnz[i]++; 1632 else onnz[i]++; 1633 } 1634 for (j=bi[i]; j<bi[i+1]; j++) { 1635 PetscInt cowner,col = gcdest[bj[j]]; 1636 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1637 if (rowner == cowner) dnnz[i]++; 1638 else onnz[i]++; 1639 } 1640 } 1641 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1642 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1643 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1644 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1645 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1646 1647 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1648 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1649 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1650 for (i=0; i<m; i++) { 1651 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1652 PetscInt j0,rowlen; 1653 rowlen = ai[i+1] - ai[i]; 1654 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1655 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1656 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1657 } 1658 rowlen = bi[i+1] - bi[i]; 1659 for (j0=j=0; j<rowlen; j0=j) { 1660 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1661 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1662 } 1663 } 1664 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1665 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1666 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1667 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1668 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1669 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1670 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1671 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1672 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1673 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1674 *B = Aperm; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 #undef __FUNCT__ 1679 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1680 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1681 { 1682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1683 PetscErrorCode ierr; 1684 1685 PetscFunctionBegin; 1686 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1687 if (ghosts) *ghosts = aij->garray; 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1693 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1694 { 1695 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1696 Mat A = mat->A,B = mat->B; 1697 PetscErrorCode ierr; 1698 PetscReal isend[5],irecv[5]; 1699 1700 PetscFunctionBegin; 1701 info->block_size = 1.0; 1702 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1703 1704 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1705 isend[3] = info->memory; isend[4] = info->mallocs; 1706 1707 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1708 1709 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1710 isend[3] += info->memory; isend[4] += info->mallocs; 1711 if (flag == MAT_LOCAL) { 1712 info->nz_used = isend[0]; 1713 info->nz_allocated = isend[1]; 1714 info->nz_unneeded = isend[2]; 1715 info->memory = isend[3]; 1716 info->mallocs = isend[4]; 1717 } else if (flag == MAT_GLOBAL_MAX) { 1718 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1719 1720 info->nz_used = irecv[0]; 1721 info->nz_allocated = irecv[1]; 1722 info->nz_unneeded = irecv[2]; 1723 info->memory = irecv[3]; 1724 info->mallocs = irecv[4]; 1725 } else if (flag == MAT_GLOBAL_SUM) { 1726 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1727 1728 info->nz_used = irecv[0]; 1729 info->nz_allocated = irecv[1]; 1730 info->nz_unneeded = irecv[2]; 1731 info->memory = irecv[3]; 1732 info->mallocs = irecv[4]; 1733 } 1734 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1735 info->fill_ratio_needed = 0; 1736 info->factor_mallocs = 0; 1737 PetscFunctionReturn(0); 1738 } 1739 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatSetOption_MPIAIJ" 1742 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1743 { 1744 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1745 PetscErrorCode ierr; 1746 1747 PetscFunctionBegin; 1748 switch (op) { 1749 case MAT_NEW_NONZERO_LOCATIONS: 1750 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1751 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1752 case MAT_KEEP_NONZERO_PATTERN: 1753 case MAT_NEW_NONZERO_LOCATION_ERR: 1754 case MAT_USE_INODES: 1755 case MAT_IGNORE_ZERO_ENTRIES: 1756 MatCheckPreallocated(A,1); 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1759 break; 1760 case MAT_ROW_ORIENTED: 1761 MatCheckPreallocated(A,1); 1762 a->roworiented = flg; 1763 1764 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1765 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1766 break; 1767 case MAT_NEW_DIAGONALS: 1768 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1769 break; 1770 case MAT_IGNORE_OFF_PROC_ENTRIES: 1771 a->donotstash = flg; 1772 break; 1773 case MAT_SPD: 1774 A->spd_set = PETSC_TRUE; 1775 A->spd = flg; 1776 if (flg) { 1777 A->symmetric = PETSC_TRUE; 1778 A->structurally_symmetric = PETSC_TRUE; 1779 A->symmetric_set = PETSC_TRUE; 1780 A->structurally_symmetric_set = PETSC_TRUE; 1781 } 1782 break; 1783 case MAT_SYMMETRIC: 1784 MatCheckPreallocated(A,1); 1785 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1786 break; 1787 case MAT_STRUCTURALLY_SYMMETRIC: 1788 MatCheckPreallocated(A,1); 1789 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1790 break; 1791 case MAT_HERMITIAN: 1792 MatCheckPreallocated(A,1); 1793 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1794 break; 1795 case MAT_SYMMETRY_ETERNAL: 1796 MatCheckPreallocated(A,1); 1797 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1798 break; 1799 default: 1800 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1801 } 1802 PetscFunctionReturn(0); 1803 } 1804 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatGetRow_MPIAIJ" 1807 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1808 { 1809 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1810 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1811 PetscErrorCode ierr; 1812 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1813 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1814 PetscInt *cmap,*idx_p; 1815 1816 PetscFunctionBegin; 1817 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1818 mat->getrowactive = PETSC_TRUE; 1819 1820 if (!mat->rowvalues && (idx || v)) { 1821 /* 1822 allocate enough space to hold information from the longest row. 1823 */ 1824 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1825 PetscInt max = 1,tmp; 1826 for (i=0; i<matin->rmap->n; i++) { 1827 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1828 if (max < tmp) max = tmp; 1829 } 1830 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1831 } 1832 1833 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1834 lrow = row - rstart; 1835 1836 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1837 if (!v) {pvA = 0; pvB = 0;} 1838 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1839 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1840 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1841 nztot = nzA + nzB; 1842 1843 cmap = mat->garray; 1844 if (v || idx) { 1845 if (nztot) { 1846 /* Sort by increasing column numbers, assuming A and B already sorted */ 1847 PetscInt imark = -1; 1848 if (v) { 1849 *v = v_p = mat->rowvalues; 1850 for (i=0; i<nzB; i++) { 1851 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1852 else break; 1853 } 1854 imark = i; 1855 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1856 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1857 } 1858 if (idx) { 1859 *idx = idx_p = mat->rowindices; 1860 if (imark > -1) { 1861 for (i=0; i<imark; i++) { 1862 idx_p[i] = cmap[cworkB[i]]; 1863 } 1864 } else { 1865 for (i=0; i<nzB; i++) { 1866 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1867 else break; 1868 } 1869 imark = i; 1870 } 1871 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1872 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1873 } 1874 } else { 1875 if (idx) *idx = 0; 1876 if (v) *v = 0; 1877 } 1878 } 1879 *nz = nztot; 1880 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1881 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1887 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1888 { 1889 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1890 1891 PetscFunctionBegin; 1892 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1893 aij->getrowactive = PETSC_FALSE; 1894 PetscFunctionReturn(0); 1895 } 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatNorm_MPIAIJ" 1899 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1900 { 1901 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1902 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1903 PetscErrorCode ierr; 1904 PetscInt i,j,cstart = mat->cmap->rstart; 1905 PetscReal sum = 0.0; 1906 MatScalar *v; 1907 1908 PetscFunctionBegin; 1909 if (aij->size == 1) { 1910 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1911 } else { 1912 if (type == NORM_FROBENIUS) { 1913 v = amat->a; 1914 for (i=0; i<amat->nz; i++) { 1915 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1916 } 1917 v = bmat->a; 1918 for (i=0; i<bmat->nz; i++) { 1919 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1920 } 1921 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1922 *norm = PetscSqrtReal(*norm); 1923 PetscLogFlops(2*amat->nz+2*bmat->nz-1); 1924 } else if (type == NORM_1) { /* max column norm */ 1925 PetscReal *tmp,*tmp2; 1926 PetscInt *jj,*garray = aij->garray; 1927 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1928 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1929 *norm = 0.0; 1930 v = amat->a; jj = amat->j; 1931 for (j=0; j<amat->nz; j++) { 1932 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1933 } 1934 v = bmat->a; jj = bmat->j; 1935 for (j=0; j<bmat->nz; j++) { 1936 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1937 } 1938 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1939 for (j=0; j<mat->cmap->N; j++) { 1940 if (tmp2[j] > *norm) *norm = tmp2[j]; 1941 } 1942 ierr = PetscFree(tmp);CHKERRQ(ierr); 1943 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1944 PetscLogFlops(amat->nz+bmat->nz-1); 1945 } else if (type == NORM_INFINITY) { /* max row norm */ 1946 PetscReal ntemp = 0.0; 1947 for (j=0; j<aij->A->rmap->n; j++) { 1948 v = amat->a + amat->i[j]; 1949 sum = 0.0; 1950 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1951 sum += PetscAbsScalar(*v); v++; 1952 } 1953 v = bmat->a + bmat->i[j]; 1954 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1955 sum += PetscAbsScalar(*v); v++; 1956 } 1957 if (sum > ntemp) ntemp = sum; 1958 } 1959 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1960 PetscLogFlops(amat->nz+bmat->nz-1); 1961 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1962 } 1963 PetscFunctionReturn(0); 1964 } 1965 1966 #undef __FUNCT__ 1967 #define __FUNCT__ "MatTranspose_MPIAIJ" 1968 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1969 { 1970 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1971 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1972 PetscErrorCode ierr; 1973 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1974 PetscInt cstart = A->cmap->rstart,ncol; 1975 Mat B; 1976 MatScalar *array; 1977 1978 PetscFunctionBegin; 1979 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1980 1981 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1982 ai = Aloc->i; aj = Aloc->j; 1983 bi = Bloc->i; bj = Bloc->j; 1984 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1985 PetscInt *d_nnz,*g_nnz,*o_nnz; 1986 PetscSFNode *oloc; 1987 PETSC_UNUSED PetscSF sf; 1988 1989 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1990 /* compute d_nnz for preallocation */ 1991 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1992 for (i=0; i<ai[ma]; i++) { 1993 d_nnz[aj[i]]++; 1994 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1995 } 1996 /* compute local off-diagonal contributions */ 1997 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1998 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1999 /* map those to global */ 2000 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2001 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2002 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2003 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2004 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2005 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2006 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2007 2008 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2009 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2010 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2011 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2012 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2013 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2014 } else { 2015 B = *matout; 2016 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2017 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2018 } 2019 2020 /* copy over the A part */ 2021 array = Aloc->a; 2022 row = A->rmap->rstart; 2023 for (i=0; i<ma; i++) { 2024 ncol = ai[i+1]-ai[i]; 2025 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2026 row++; 2027 array += ncol; aj += ncol; 2028 } 2029 aj = Aloc->j; 2030 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2031 2032 /* copy over the B part */ 2033 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2034 array = Bloc->a; 2035 row = A->rmap->rstart; 2036 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2037 cols_tmp = cols; 2038 for (i=0; i<mb; i++) { 2039 ncol = bi[i+1]-bi[i]; 2040 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2041 row++; 2042 array += ncol; cols_tmp += ncol; 2043 } 2044 ierr = PetscFree(cols);CHKERRQ(ierr); 2045 2046 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2047 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2048 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2049 *matout = B; 2050 } else { 2051 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2052 } 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2058 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2059 { 2060 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2061 Mat a = aij->A,b = aij->B; 2062 PetscErrorCode ierr; 2063 PetscInt s1,s2,s3; 2064 2065 PetscFunctionBegin; 2066 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2067 if (rr) { 2068 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2069 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2070 /* Overlap communication with computation. */ 2071 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2072 } 2073 if (ll) { 2074 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2075 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2076 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2077 } 2078 /* scale the diagonal block */ 2079 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2080 2081 if (rr) { 2082 /* Do a scatter end and then right scale the off-diagonal block */ 2083 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2084 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2085 } 2086 PetscFunctionReturn(0); 2087 } 2088 2089 #undef __FUNCT__ 2090 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2091 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2092 { 2093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2094 PetscErrorCode ierr; 2095 2096 PetscFunctionBegin; 2097 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2098 PetscFunctionReturn(0); 2099 } 2100 2101 #undef __FUNCT__ 2102 #define __FUNCT__ "MatEqual_MPIAIJ" 2103 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2104 { 2105 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2106 Mat a,b,c,d; 2107 PetscBool flg; 2108 PetscErrorCode ierr; 2109 2110 PetscFunctionBegin; 2111 a = matA->A; b = matA->B; 2112 c = matB->A; d = matB->B; 2113 2114 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2115 if (flg) { 2116 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2117 } 2118 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatCopy_MPIAIJ" 2124 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2125 { 2126 PetscErrorCode ierr; 2127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2128 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2129 2130 PetscFunctionBegin; 2131 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2132 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2133 /* because of the column compression in the off-processor part of the matrix a->B, 2134 the number of columns in a->B and b->B may be different, hence we cannot call 2135 the MatCopy() directly on the two parts. If need be, we can provide a more 2136 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2137 then copying the submatrices */ 2138 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2139 } else { 2140 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2141 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2142 } 2143 PetscFunctionReturn(0); 2144 } 2145 2146 #undef __FUNCT__ 2147 #define __FUNCT__ "MatSetUp_MPIAIJ" 2148 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2149 { 2150 PetscErrorCode ierr; 2151 2152 PetscFunctionBegin; 2153 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2154 PetscFunctionReturn(0); 2155 } 2156 2157 /* 2158 Computes the number of nonzeros per row needed for preallocation when X and Y 2159 have different nonzero structure. 2160 */ 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2163 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2164 { 2165 PetscInt i,j,k,nzx,nzy; 2166 2167 PetscFunctionBegin; 2168 /* Set the number of nonzeros in the new matrix */ 2169 for (i=0; i<m; i++) { 2170 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2171 nzx = xi[i+1] - xi[i]; 2172 nzy = yi[i+1] - yi[i]; 2173 nnz[i] = 0; 2174 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2175 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2176 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2177 nnz[i]++; 2178 } 2179 for (; k<nzy; k++) nnz[i]++; 2180 } 2181 PetscFunctionReturn(0); 2182 } 2183 2184 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2187 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2188 { 2189 PetscErrorCode ierr; 2190 PetscInt m = Y->rmap->N; 2191 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2192 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2193 2194 PetscFunctionBegin; 2195 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2196 PetscFunctionReturn(0); 2197 } 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatAXPY_MPIAIJ" 2201 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2202 { 2203 PetscErrorCode ierr; 2204 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2205 PetscBLASInt bnz,one=1; 2206 Mat_SeqAIJ *x,*y; 2207 2208 PetscFunctionBegin; 2209 if (str == SAME_NONZERO_PATTERN) { 2210 PetscScalar alpha = a; 2211 x = (Mat_SeqAIJ*)xx->A->data; 2212 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2213 y = (Mat_SeqAIJ*)yy->A->data; 2214 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2215 x = (Mat_SeqAIJ*)xx->B->data; 2216 y = (Mat_SeqAIJ*)yy->B->data; 2217 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2218 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2219 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2220 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2221 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2222 } else { 2223 Mat B; 2224 PetscInt *nnz_d,*nnz_o; 2225 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2226 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2227 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2228 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2229 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2230 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2231 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2232 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2233 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2234 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2235 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2236 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2237 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2238 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2239 } 2240 PetscFunctionReturn(0); 2241 } 2242 2243 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2244 2245 #undef __FUNCT__ 2246 #define __FUNCT__ "MatConjugate_MPIAIJ" 2247 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2248 { 2249 #if defined(PETSC_USE_COMPLEX) 2250 PetscErrorCode ierr; 2251 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2252 2253 PetscFunctionBegin; 2254 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2255 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2256 #else 2257 PetscFunctionBegin; 2258 #endif 2259 PetscFunctionReturn(0); 2260 } 2261 2262 #undef __FUNCT__ 2263 #define __FUNCT__ "MatRealPart_MPIAIJ" 2264 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2265 { 2266 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2267 PetscErrorCode ierr; 2268 2269 PetscFunctionBegin; 2270 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2271 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2272 PetscFunctionReturn(0); 2273 } 2274 2275 #undef __FUNCT__ 2276 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2277 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2278 { 2279 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2280 PetscErrorCode ierr; 2281 2282 PetscFunctionBegin; 2283 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2284 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2285 PetscFunctionReturn(0); 2286 } 2287 2288 #undef __FUNCT__ 2289 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2290 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2291 { 2292 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2293 PetscErrorCode ierr; 2294 PetscInt i,*idxb = 0; 2295 PetscScalar *va,*vb; 2296 Vec vtmp; 2297 2298 PetscFunctionBegin; 2299 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2300 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2301 if (idx) { 2302 for (i=0; i<A->rmap->n; i++) { 2303 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2304 } 2305 } 2306 2307 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2308 if (idx) { 2309 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2310 } 2311 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2312 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2313 2314 for (i=0; i<A->rmap->n; i++) { 2315 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2316 va[i] = vb[i]; 2317 if (idx) idx[i] = a->garray[idxb[i]]; 2318 } 2319 } 2320 2321 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2322 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2323 ierr = PetscFree(idxb);CHKERRQ(ierr); 2324 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2325 PetscFunctionReturn(0); 2326 } 2327 2328 #undef __FUNCT__ 2329 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2330 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2331 { 2332 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2333 PetscErrorCode ierr; 2334 PetscInt i,*idxb = 0; 2335 PetscScalar *va,*vb; 2336 Vec vtmp; 2337 2338 PetscFunctionBegin; 2339 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2340 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2341 if (idx) { 2342 for (i=0; i<A->cmap->n; i++) { 2343 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2344 } 2345 } 2346 2347 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2348 if (idx) { 2349 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2350 } 2351 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2352 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2353 2354 for (i=0; i<A->rmap->n; i++) { 2355 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2356 va[i] = vb[i]; 2357 if (idx) idx[i] = a->garray[idxb[i]]; 2358 } 2359 } 2360 2361 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2362 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2363 ierr = PetscFree(idxb);CHKERRQ(ierr); 2364 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2365 PetscFunctionReturn(0); 2366 } 2367 2368 #undef __FUNCT__ 2369 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2370 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2371 { 2372 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2373 PetscInt n = A->rmap->n; 2374 PetscInt cstart = A->cmap->rstart; 2375 PetscInt *cmap = mat->garray; 2376 PetscInt *diagIdx, *offdiagIdx; 2377 Vec diagV, offdiagV; 2378 PetscScalar *a, *diagA, *offdiagA; 2379 PetscInt r; 2380 PetscErrorCode ierr; 2381 2382 PetscFunctionBegin; 2383 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2384 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2385 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2386 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2387 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2388 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2389 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2390 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2391 for (r = 0; r < n; ++r) { 2392 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2393 a[r] = diagA[r]; 2394 idx[r] = cstart + diagIdx[r]; 2395 } else { 2396 a[r] = offdiagA[r]; 2397 idx[r] = cmap[offdiagIdx[r]]; 2398 } 2399 } 2400 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2401 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2402 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2403 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2404 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2405 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2411 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2412 { 2413 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2414 PetscInt n = A->rmap->n; 2415 PetscInt cstart = A->cmap->rstart; 2416 PetscInt *cmap = mat->garray; 2417 PetscInt *diagIdx, *offdiagIdx; 2418 Vec diagV, offdiagV; 2419 PetscScalar *a, *diagA, *offdiagA; 2420 PetscInt r; 2421 PetscErrorCode ierr; 2422 2423 PetscFunctionBegin; 2424 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2425 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2426 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2427 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2428 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2429 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2430 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2431 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2432 for (r = 0; r < n; ++r) { 2433 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2434 a[r] = diagA[r]; 2435 idx[r] = cstart + diagIdx[r]; 2436 } else { 2437 a[r] = offdiagA[r]; 2438 idx[r] = cmap[offdiagIdx[r]]; 2439 } 2440 } 2441 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2442 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2443 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2444 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2445 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2446 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 #undef __FUNCT__ 2451 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2452 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2453 { 2454 PetscErrorCode ierr; 2455 Mat *dummy; 2456 2457 PetscFunctionBegin; 2458 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2459 *newmat = *dummy; 2460 ierr = PetscFree(dummy);CHKERRQ(ierr); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2466 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2467 { 2468 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2469 PetscErrorCode ierr; 2470 2471 PetscFunctionBegin; 2472 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2473 A->errortype = a->A->errortype; 2474 PetscFunctionReturn(0); 2475 } 2476 2477 #undef __FUNCT__ 2478 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2479 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2480 { 2481 PetscErrorCode ierr; 2482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2483 2484 PetscFunctionBegin; 2485 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2486 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2487 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2488 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNCT__ 2493 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2494 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2495 { 2496 PetscFunctionBegin; 2497 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2498 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2499 PetscFunctionReturn(0); 2500 } 2501 2502 #undef __FUNCT__ 2503 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2504 /*@ 2505 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2506 2507 Collective on Mat 2508 2509 Input Parameters: 2510 + A - the matrix 2511 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2512 2513 Level: advanced 2514 2515 @*/ 2516 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2517 { 2518 PetscErrorCode ierr; 2519 2520 PetscFunctionBegin; 2521 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2522 PetscFunctionReturn(0); 2523 } 2524 2525 #undef __FUNCT__ 2526 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2527 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2528 { 2529 PetscErrorCode ierr; 2530 PetscBool sc = PETSC_FALSE,flg; 2531 2532 PetscFunctionBegin; 2533 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2534 ierr = PetscObjectOptionsBegin((PetscObject)A); 2535 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2536 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2537 if (flg) { 2538 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2539 } 2540 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2541 PetscFunctionReturn(0); 2542 } 2543 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "MatShift_MPIAIJ" 2546 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2547 { 2548 PetscErrorCode ierr; 2549 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2550 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2551 2552 PetscFunctionBegin; 2553 if (!Y->preallocated) { 2554 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2555 } else if (!aij->nz) { 2556 PetscInt nonew = aij->nonew; 2557 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2558 aij->nonew = nonew; 2559 } 2560 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2561 PetscFunctionReturn(0); 2562 } 2563 2564 #undef __FUNCT__ 2565 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2566 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2567 { 2568 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2569 PetscErrorCode ierr; 2570 2571 PetscFunctionBegin; 2572 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2573 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2574 if (d) { 2575 PetscInt rstart; 2576 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2577 *d += rstart; 2578 2579 } 2580 PetscFunctionReturn(0); 2581 } 2582 2583 2584 /* -------------------------------------------------------------------*/ 2585 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2586 MatGetRow_MPIAIJ, 2587 MatRestoreRow_MPIAIJ, 2588 MatMult_MPIAIJ, 2589 /* 4*/ MatMultAdd_MPIAIJ, 2590 MatMultTranspose_MPIAIJ, 2591 MatMultTransposeAdd_MPIAIJ, 2592 0, 2593 0, 2594 0, 2595 /*10*/ 0, 2596 0, 2597 0, 2598 MatSOR_MPIAIJ, 2599 MatTranspose_MPIAIJ, 2600 /*15*/ MatGetInfo_MPIAIJ, 2601 MatEqual_MPIAIJ, 2602 MatGetDiagonal_MPIAIJ, 2603 MatDiagonalScale_MPIAIJ, 2604 MatNorm_MPIAIJ, 2605 /*20*/ MatAssemblyBegin_MPIAIJ, 2606 MatAssemblyEnd_MPIAIJ, 2607 MatSetOption_MPIAIJ, 2608 MatZeroEntries_MPIAIJ, 2609 /*24*/ MatZeroRows_MPIAIJ, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*29*/ MatSetUp_MPIAIJ, 2615 0, 2616 0, 2617 0, 2618 0, 2619 /*34*/ MatDuplicate_MPIAIJ, 2620 0, 2621 0, 2622 0, 2623 0, 2624 /*39*/ MatAXPY_MPIAIJ, 2625 MatGetSubMatrices_MPIAIJ, 2626 MatIncreaseOverlap_MPIAIJ, 2627 MatGetValues_MPIAIJ, 2628 MatCopy_MPIAIJ, 2629 /*44*/ MatGetRowMax_MPIAIJ, 2630 MatScale_MPIAIJ, 2631 MatShift_MPIAIJ, 2632 MatDiagonalSet_MPIAIJ, 2633 MatZeroRowsColumns_MPIAIJ, 2634 /*49*/ MatSetRandom_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*54*/ MatFDColoringCreate_MPIXAIJ, 2640 0, 2641 MatSetUnfactored_MPIAIJ, 2642 MatPermute_MPIAIJ, 2643 0, 2644 /*59*/ MatGetSubMatrix_MPIAIJ, 2645 MatDestroy_MPIAIJ, 2646 MatView_MPIAIJ, 2647 0, 2648 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2649 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2650 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2651 0, 2652 0, 2653 0, 2654 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2655 MatGetRowMinAbs_MPIAIJ, 2656 0, 2657 MatSetColoring_MPIAIJ, 2658 0, 2659 MatSetValuesAdifor_MPIAIJ, 2660 /*75*/ MatFDColoringApply_AIJ, 2661 MatSetFromOptions_MPIAIJ, 2662 0, 2663 0, 2664 MatFindZeroDiagonals_MPIAIJ, 2665 /*80*/ 0, 2666 0, 2667 0, 2668 /*83*/ MatLoad_MPIAIJ, 2669 0, 2670 0, 2671 0, 2672 0, 2673 0, 2674 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2675 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2676 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2677 MatPtAP_MPIAIJ_MPIAIJ, 2678 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2679 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2680 0, 2681 0, 2682 0, 2683 0, 2684 /*99*/ 0, 2685 0, 2686 0, 2687 MatConjugate_MPIAIJ, 2688 0, 2689 /*104*/MatSetValuesRow_MPIAIJ, 2690 MatRealPart_MPIAIJ, 2691 MatImaginaryPart_MPIAIJ, 2692 0, 2693 0, 2694 /*109*/0, 2695 0, 2696 MatGetRowMin_MPIAIJ, 2697 0, 2698 MatMissingDiagonal_MPIAIJ, 2699 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2700 0, 2701 MatGetGhosts_MPIAIJ, 2702 0, 2703 0, 2704 /*119*/0, 2705 0, 2706 0, 2707 0, 2708 MatGetMultiProcBlock_MPIAIJ, 2709 /*124*/MatFindNonzeroRows_MPIAIJ, 2710 MatGetColumnNorms_MPIAIJ, 2711 MatInvertBlockDiagonal_MPIAIJ, 2712 0, 2713 MatGetSubMatricesMPI_MPIAIJ, 2714 /*129*/0, 2715 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2716 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2717 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2718 0, 2719 /*134*/0, 2720 0, 2721 0, 2722 0, 2723 0, 2724 /*139*/0, 2725 0, 2726 0, 2727 MatFDColoringSetUp_MPIXAIJ, 2728 MatFindOffBlockDiagonalEntries_MPIAIJ, 2729 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2730 }; 2731 2732 /* ----------------------------------------------------------------------------------------*/ 2733 2734 #undef __FUNCT__ 2735 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2736 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2737 { 2738 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2739 PetscErrorCode ierr; 2740 2741 PetscFunctionBegin; 2742 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2743 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2744 PetscFunctionReturn(0); 2745 } 2746 2747 #undef __FUNCT__ 2748 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2749 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2750 { 2751 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2752 PetscErrorCode ierr; 2753 2754 PetscFunctionBegin; 2755 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2756 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2757 PetscFunctionReturn(0); 2758 } 2759 2760 #undef __FUNCT__ 2761 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2762 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2763 { 2764 Mat_MPIAIJ *b; 2765 PetscErrorCode ierr; 2766 2767 PetscFunctionBegin; 2768 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2769 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2770 b = (Mat_MPIAIJ*)B->data; 2771 2772 if (!B->preallocated) { 2773 /* Explicitly create 2 MATSEQAIJ matrices. */ 2774 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2775 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2776 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2777 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2778 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2779 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2780 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2781 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2782 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2783 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2784 } 2785 2786 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2787 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2788 B->preallocated = PETSC_TRUE; 2789 PetscFunctionReturn(0); 2790 } 2791 2792 #undef __FUNCT__ 2793 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2794 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2795 { 2796 Mat mat; 2797 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2798 PetscErrorCode ierr; 2799 2800 PetscFunctionBegin; 2801 *newmat = 0; 2802 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2803 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2804 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2805 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2806 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2807 a = (Mat_MPIAIJ*)mat->data; 2808 2809 mat->factortype = matin->factortype; 2810 mat->assembled = PETSC_TRUE; 2811 mat->insertmode = NOT_SET_VALUES; 2812 mat->preallocated = PETSC_TRUE; 2813 2814 a->size = oldmat->size; 2815 a->rank = oldmat->rank; 2816 a->donotstash = oldmat->donotstash; 2817 a->roworiented = oldmat->roworiented; 2818 a->rowindices = 0; 2819 a->rowvalues = 0; 2820 a->getrowactive = PETSC_FALSE; 2821 2822 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2823 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2824 2825 if (oldmat->colmap) { 2826 #if defined(PETSC_USE_CTABLE) 2827 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2828 #else 2829 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2830 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2831 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2832 #endif 2833 } else a->colmap = 0; 2834 if (oldmat->garray) { 2835 PetscInt len; 2836 len = oldmat->B->cmap->n; 2837 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2838 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2839 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2840 } else a->garray = 0; 2841 2842 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2843 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2844 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2845 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2846 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2847 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2848 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2849 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2850 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2851 *newmat = mat; 2852 PetscFunctionReturn(0); 2853 } 2854 2855 2856 2857 #undef __FUNCT__ 2858 #define __FUNCT__ "MatLoad_MPIAIJ" 2859 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2860 { 2861 PetscScalar *vals,*svals; 2862 MPI_Comm comm; 2863 PetscErrorCode ierr; 2864 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2865 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2866 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2867 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2868 PetscInt cend,cstart,n,*rowners; 2869 int fd; 2870 PetscInt bs = newMat->rmap->bs; 2871 2872 PetscFunctionBegin; 2873 /* force binary viewer to load .info file if it has not yet done so */ 2874 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2875 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2876 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2877 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2878 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2879 if (!rank) { 2880 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2881 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2882 } 2883 2884 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr); 2885 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2886 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2887 if (bs < 0) bs = 1; 2888 2889 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2890 M = header[1]; N = header[2]; 2891 2892 /* If global sizes are set, check if they are consistent with that given in the file */ 2893 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2894 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2895 2896 /* determine ownership of all (block) rows */ 2897 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2898 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2899 else m = newMat->rmap->n; /* Set by user */ 2900 2901 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2902 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2903 2904 /* First process needs enough room for process with most rows */ 2905 if (!rank) { 2906 mmax = rowners[1]; 2907 for (i=2; i<=size; i++) { 2908 mmax = PetscMax(mmax, rowners[i]); 2909 } 2910 } else mmax = -1; /* unused, but compilers complain */ 2911 2912 rowners[0] = 0; 2913 for (i=2; i<=size; i++) { 2914 rowners[i] += rowners[i-1]; 2915 } 2916 rstart = rowners[rank]; 2917 rend = rowners[rank+1]; 2918 2919 /* distribute row lengths to all processors */ 2920 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2921 if (!rank) { 2922 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2923 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2924 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2925 for (j=0; j<m; j++) { 2926 procsnz[0] += ourlens[j]; 2927 } 2928 for (i=1; i<size; i++) { 2929 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2930 /* calculate the number of nonzeros on each processor */ 2931 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2932 procsnz[i] += rowlengths[j]; 2933 } 2934 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2935 } 2936 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2937 } else { 2938 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2939 } 2940 2941 if (!rank) { 2942 /* determine max buffer needed and allocate it */ 2943 maxnz = 0; 2944 for (i=0; i<size; i++) { 2945 maxnz = PetscMax(maxnz,procsnz[i]); 2946 } 2947 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2948 2949 /* read in my part of the matrix column indices */ 2950 nz = procsnz[0]; 2951 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2952 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2953 2954 /* read in every one elses and ship off */ 2955 for (i=1; i<size; i++) { 2956 nz = procsnz[i]; 2957 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2958 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2959 } 2960 ierr = PetscFree(cols);CHKERRQ(ierr); 2961 } else { 2962 /* determine buffer space needed for message */ 2963 nz = 0; 2964 for (i=0; i<m; i++) { 2965 nz += ourlens[i]; 2966 } 2967 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2968 2969 /* receive message of column indices*/ 2970 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2971 } 2972 2973 /* determine column ownership if matrix is not square */ 2974 if (N != M) { 2975 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2976 else n = newMat->cmap->n; 2977 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2978 cstart = cend - n; 2979 } else { 2980 cstart = rstart; 2981 cend = rend; 2982 n = cend - cstart; 2983 } 2984 2985 /* loop over local rows, determining number of off diagonal entries */ 2986 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2987 jj = 0; 2988 for (i=0; i<m; i++) { 2989 for (j=0; j<ourlens[i]; j++) { 2990 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2991 jj++; 2992 } 2993 } 2994 2995 for (i=0; i<m; i++) { 2996 ourlens[i] -= offlens[i]; 2997 } 2998 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2999 3000 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3001 3002 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3003 3004 for (i=0; i<m; i++) { 3005 ourlens[i] += offlens[i]; 3006 } 3007 3008 if (!rank) { 3009 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3010 3011 /* read in my part of the matrix numerical values */ 3012 nz = procsnz[0]; 3013 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3014 3015 /* insert into matrix */ 3016 jj = rstart; 3017 smycols = mycols; 3018 svals = vals; 3019 for (i=0; i<m; i++) { 3020 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3021 smycols += ourlens[i]; 3022 svals += ourlens[i]; 3023 jj++; 3024 } 3025 3026 /* read in other processors and ship out */ 3027 for (i=1; i<size; i++) { 3028 nz = procsnz[i]; 3029 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3030 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3031 } 3032 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3033 } else { 3034 /* receive numeric values */ 3035 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3036 3037 /* receive message of values*/ 3038 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3039 3040 /* insert into matrix */ 3041 jj = rstart; 3042 smycols = mycols; 3043 svals = vals; 3044 for (i=0; i<m; i++) { 3045 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3046 smycols += ourlens[i]; 3047 svals += ourlens[i]; 3048 jj++; 3049 } 3050 } 3051 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3052 ierr = PetscFree(vals);CHKERRQ(ierr); 3053 ierr = PetscFree(mycols);CHKERRQ(ierr); 3054 ierr = PetscFree(rowners);CHKERRQ(ierr); 3055 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3056 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3057 PetscFunctionReturn(0); 3058 } 3059 3060 #undef __FUNCT__ 3061 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3062 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3063 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3064 { 3065 PetscErrorCode ierr; 3066 IS iscol_local; 3067 PetscInt csize; 3068 3069 PetscFunctionBegin; 3070 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3071 if (call == MAT_REUSE_MATRIX) { 3072 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3073 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3074 } else { 3075 /* check if we are grabbing all columns*/ 3076 PetscBool isstride; 3077 PetscMPIInt lisstride = 0,gisstride; 3078 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3079 if (isstride) { 3080 PetscInt start,len,mstart,mlen; 3081 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3082 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3083 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3084 if (mstart == start && mlen-mstart == len) lisstride = 1; 3085 } 3086 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3087 if (gisstride) { 3088 PetscInt N; 3089 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3090 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3091 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3092 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3093 } else { 3094 PetscInt cbs; 3095 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3096 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3097 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3098 } 3099 } 3100 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3101 if (call == MAT_INITIAL_MATRIX) { 3102 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3103 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3104 } 3105 PetscFunctionReturn(0); 3106 } 3107 3108 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3109 #undef __FUNCT__ 3110 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3111 /* 3112 Not great since it makes two copies of the submatrix, first an SeqAIJ 3113 in local and then by concatenating the local matrices the end result. 3114 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3115 3116 Note: This requires a sequential iscol with all indices. 3117 */ 3118 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3119 { 3120 PetscErrorCode ierr; 3121 PetscMPIInt rank,size; 3122 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3123 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3124 PetscBool allcolumns, colflag; 3125 Mat M,Mreuse; 3126 MatScalar *vwork,*aa; 3127 MPI_Comm comm; 3128 Mat_SeqAIJ *aij; 3129 3130 PetscFunctionBegin; 3131 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3132 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3133 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3134 3135 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3136 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3137 if (colflag && ncol == mat->cmap->N) { 3138 allcolumns = PETSC_TRUE; 3139 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3140 } else { 3141 allcolumns = PETSC_FALSE; 3142 } 3143 if (call == MAT_REUSE_MATRIX) { 3144 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3145 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3146 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3147 } else { 3148 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3149 } 3150 3151 /* 3152 m - number of local rows 3153 n - number of columns (same on all processors) 3154 rstart - first row in new global matrix generated 3155 */ 3156 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3157 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3158 if (call == MAT_INITIAL_MATRIX) { 3159 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3160 ii = aij->i; 3161 jj = aij->j; 3162 3163 /* 3164 Determine the number of non-zeros in the diagonal and off-diagonal 3165 portions of the matrix in order to do correct preallocation 3166 */ 3167 3168 /* first get start and end of "diagonal" columns */ 3169 if (csize == PETSC_DECIDE) { 3170 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3171 if (mglobal == n) { /* square matrix */ 3172 nlocal = m; 3173 } else { 3174 nlocal = n/size + ((n % size) > rank); 3175 } 3176 } else { 3177 nlocal = csize; 3178 } 3179 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3180 rstart = rend - nlocal; 3181 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3182 3183 /* next, compute all the lengths */ 3184 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3185 olens = dlens + m; 3186 for (i=0; i<m; i++) { 3187 jend = ii[i+1] - ii[i]; 3188 olen = 0; 3189 dlen = 0; 3190 for (j=0; j<jend; j++) { 3191 if (*jj < rstart || *jj >= rend) olen++; 3192 else dlen++; 3193 jj++; 3194 } 3195 olens[i] = olen; 3196 dlens[i] = dlen; 3197 } 3198 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3199 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3200 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3201 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3202 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3203 ierr = PetscFree(dlens);CHKERRQ(ierr); 3204 } else { 3205 PetscInt ml,nl; 3206 3207 M = *newmat; 3208 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3209 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3210 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3211 /* 3212 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3213 rather than the slower MatSetValues(). 3214 */ 3215 M->was_assembled = PETSC_TRUE; 3216 M->assembled = PETSC_FALSE; 3217 } 3218 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3219 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3220 ii = aij->i; 3221 jj = aij->j; 3222 aa = aij->a; 3223 for (i=0; i<m; i++) { 3224 row = rstart + i; 3225 nz = ii[i+1] - ii[i]; 3226 cwork = jj; jj += nz; 3227 vwork = aa; aa += nz; 3228 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3229 } 3230 3231 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3232 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3233 *newmat = M; 3234 3235 /* save submatrix used in processor for next request */ 3236 if (call == MAT_INITIAL_MATRIX) { 3237 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3238 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3239 } 3240 PetscFunctionReturn(0); 3241 } 3242 3243 #undef __FUNCT__ 3244 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3245 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3246 { 3247 PetscInt m,cstart, cend,j,nnz,i,d; 3248 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3249 const PetscInt *JJ; 3250 PetscScalar *values; 3251 PetscErrorCode ierr; 3252 3253 PetscFunctionBegin; 3254 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3255 3256 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3257 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3258 m = B->rmap->n; 3259 cstart = B->cmap->rstart; 3260 cend = B->cmap->rend; 3261 rstart = B->rmap->rstart; 3262 3263 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3264 3265 #if defined(PETSC_USE_DEBUGGING) 3266 for (i=0; i<m; i++) { 3267 nnz = Ii[i+1]- Ii[i]; 3268 JJ = J + Ii[i]; 3269 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3270 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3271 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3272 } 3273 #endif 3274 3275 for (i=0; i<m; i++) { 3276 nnz = Ii[i+1]- Ii[i]; 3277 JJ = J + Ii[i]; 3278 nnz_max = PetscMax(nnz_max,nnz); 3279 d = 0; 3280 for (j=0; j<nnz; j++) { 3281 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3282 } 3283 d_nnz[i] = d; 3284 o_nnz[i] = nnz - d; 3285 } 3286 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3287 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3288 3289 if (v) values = (PetscScalar*)v; 3290 else { 3291 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3292 } 3293 3294 for (i=0; i<m; i++) { 3295 ii = i + rstart; 3296 nnz = Ii[i+1]- Ii[i]; 3297 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3298 } 3299 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3300 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3301 3302 if (!v) { 3303 ierr = PetscFree(values);CHKERRQ(ierr); 3304 } 3305 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3306 PetscFunctionReturn(0); 3307 } 3308 3309 #undef __FUNCT__ 3310 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3311 /*@ 3312 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3313 (the default parallel PETSc format). 3314 3315 Collective on MPI_Comm 3316 3317 Input Parameters: 3318 + B - the matrix 3319 . i - the indices into j for the start of each local row (starts with zero) 3320 . j - the column indices for each local row (starts with zero) 3321 - v - optional values in the matrix 3322 3323 Level: developer 3324 3325 Notes: 3326 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3327 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3328 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3329 3330 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3331 3332 The format which is used for the sparse matrix input, is equivalent to a 3333 row-major ordering.. i.e for the following matrix, the input data expected is 3334 as shown 3335 3336 $ 1 0 0 3337 $ 2 0 3 P0 3338 $ ------- 3339 $ 4 5 6 P1 3340 $ 3341 $ Process0 [P0]: rows_owned=[0,1] 3342 $ i = {0,1,3} [size = nrow+1 = 2+1] 3343 $ j = {0,0,2} [size = 3] 3344 $ v = {1,2,3} [size = 3] 3345 $ 3346 $ Process1 [P1]: rows_owned=[2] 3347 $ i = {0,3} [size = nrow+1 = 1+1] 3348 $ j = {0,1,2} [size = 3] 3349 $ v = {4,5,6} [size = 3] 3350 3351 .keywords: matrix, aij, compressed row, sparse, parallel 3352 3353 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3354 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3355 @*/ 3356 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3357 { 3358 PetscErrorCode ierr; 3359 3360 PetscFunctionBegin; 3361 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3362 PetscFunctionReturn(0); 3363 } 3364 3365 #undef __FUNCT__ 3366 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3367 /*@C 3368 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3369 (the default parallel PETSc format). For good matrix assembly performance 3370 the user should preallocate the matrix storage by setting the parameters 3371 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3372 performance can be increased by more than a factor of 50. 3373 3374 Collective on MPI_Comm 3375 3376 Input Parameters: 3377 + B - the matrix 3378 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3379 (same value is used for all local rows) 3380 . d_nnz - array containing the number of nonzeros in the various rows of the 3381 DIAGONAL portion of the local submatrix (possibly different for each row) 3382 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3383 The size of this array is equal to the number of local rows, i.e 'm'. 3384 For matrices that will be factored, you must leave room for (and set) 3385 the diagonal entry even if it is zero. 3386 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3387 submatrix (same value is used for all local rows). 3388 - o_nnz - array containing the number of nonzeros in the various rows of the 3389 OFF-DIAGONAL portion of the local submatrix (possibly different for 3390 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3391 structure. The size of this array is equal to the number 3392 of local rows, i.e 'm'. 3393 3394 If the *_nnz parameter is given then the *_nz parameter is ignored 3395 3396 The AIJ format (also called the Yale sparse matrix format or 3397 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3398 storage. The stored row and column indices begin with zero. 3399 See Users-Manual: ch_mat for details. 3400 3401 The parallel matrix is partitioned such that the first m0 rows belong to 3402 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3403 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3404 3405 The DIAGONAL portion of the local submatrix of a processor can be defined 3406 as the submatrix which is obtained by extraction the part corresponding to 3407 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3408 first row that belongs to the processor, r2 is the last row belonging to 3409 the this processor, and c1-c2 is range of indices of the local part of a 3410 vector suitable for applying the matrix to. This is an mxn matrix. In the 3411 common case of a square matrix, the row and column ranges are the same and 3412 the DIAGONAL part is also square. The remaining portion of the local 3413 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3414 3415 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3416 3417 You can call MatGetInfo() to get information on how effective the preallocation was; 3418 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3419 You can also run with the option -info and look for messages with the string 3420 malloc in them to see if additional memory allocation was needed. 3421 3422 Example usage: 3423 3424 Consider the following 8x8 matrix with 34 non-zero values, that is 3425 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3426 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3427 as follows: 3428 3429 .vb 3430 1 2 0 | 0 3 0 | 0 4 3431 Proc0 0 5 6 | 7 0 0 | 8 0 3432 9 0 10 | 11 0 0 | 12 0 3433 ------------------------------------- 3434 13 0 14 | 15 16 17 | 0 0 3435 Proc1 0 18 0 | 19 20 21 | 0 0 3436 0 0 0 | 22 23 0 | 24 0 3437 ------------------------------------- 3438 Proc2 25 26 27 | 0 0 28 | 29 0 3439 30 0 0 | 31 32 33 | 0 34 3440 .ve 3441 3442 This can be represented as a collection of submatrices as: 3443 3444 .vb 3445 A B C 3446 D E F 3447 G H I 3448 .ve 3449 3450 Where the submatrices A,B,C are owned by proc0, D,E,F are 3451 owned by proc1, G,H,I are owned by proc2. 3452 3453 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3454 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3455 The 'M','N' parameters are 8,8, and have the same values on all procs. 3456 3457 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3458 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3459 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3460 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3461 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3462 matrix, ans [DF] as another SeqAIJ matrix. 3463 3464 When d_nz, o_nz parameters are specified, d_nz storage elements are 3465 allocated for every row of the local diagonal submatrix, and o_nz 3466 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3467 One way to choose d_nz and o_nz is to use the max nonzerors per local 3468 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3469 In this case, the values of d_nz,o_nz are: 3470 .vb 3471 proc0 : dnz = 2, o_nz = 2 3472 proc1 : dnz = 3, o_nz = 2 3473 proc2 : dnz = 1, o_nz = 4 3474 .ve 3475 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3476 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3477 for proc3. i.e we are using 12+15+10=37 storage locations to store 3478 34 values. 3479 3480 When d_nnz, o_nnz parameters are specified, the storage is specified 3481 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3482 In the above case the values for d_nnz,o_nnz are: 3483 .vb 3484 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3485 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3486 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3487 .ve 3488 Here the space allocated is sum of all the above values i.e 34, and 3489 hence pre-allocation is perfect. 3490 3491 Level: intermediate 3492 3493 .keywords: matrix, aij, compressed row, sparse, parallel 3494 3495 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3496 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3497 @*/ 3498 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3499 { 3500 PetscErrorCode ierr; 3501 3502 PetscFunctionBegin; 3503 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3504 PetscValidType(B,1); 3505 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3506 PetscFunctionReturn(0); 3507 } 3508 3509 #undef __FUNCT__ 3510 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3511 /*@ 3512 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3513 CSR format the local rows. 3514 3515 Collective on MPI_Comm 3516 3517 Input Parameters: 3518 + comm - MPI communicator 3519 . m - number of local rows (Cannot be PETSC_DECIDE) 3520 . n - This value should be the same as the local size used in creating the 3521 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3522 calculated if N is given) For square matrices n is almost always m. 3523 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3524 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3525 . i - row indices 3526 . j - column indices 3527 - a - matrix values 3528 3529 Output Parameter: 3530 . mat - the matrix 3531 3532 Level: intermediate 3533 3534 Notes: 3535 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3536 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3537 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3538 3539 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3540 3541 The format which is used for the sparse matrix input, is equivalent to a 3542 row-major ordering.. i.e for the following matrix, the input data expected is 3543 as shown 3544 3545 $ 1 0 0 3546 $ 2 0 3 P0 3547 $ ------- 3548 $ 4 5 6 P1 3549 $ 3550 $ Process0 [P0]: rows_owned=[0,1] 3551 $ i = {0,1,3} [size = nrow+1 = 2+1] 3552 $ j = {0,0,2} [size = 3] 3553 $ v = {1,2,3} [size = 3] 3554 $ 3555 $ Process1 [P1]: rows_owned=[2] 3556 $ i = {0,3} [size = nrow+1 = 1+1] 3557 $ j = {0,1,2} [size = 3] 3558 $ v = {4,5,6} [size = 3] 3559 3560 .keywords: matrix, aij, compressed row, sparse, parallel 3561 3562 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3563 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3564 @*/ 3565 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3566 { 3567 PetscErrorCode ierr; 3568 3569 PetscFunctionBegin; 3570 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3571 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3572 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3573 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3574 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3575 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3576 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3577 PetscFunctionReturn(0); 3578 } 3579 3580 #undef __FUNCT__ 3581 #define __FUNCT__ "MatCreateAIJ" 3582 /*@C 3583 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3584 (the default parallel PETSc format). For good matrix assembly performance 3585 the user should preallocate the matrix storage by setting the parameters 3586 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3587 performance can be increased by more than a factor of 50. 3588 3589 Collective on MPI_Comm 3590 3591 Input Parameters: 3592 + comm - MPI communicator 3593 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3594 This value should be the same as the local size used in creating the 3595 y vector for the matrix-vector product y = Ax. 3596 . n - This value should be the same as the local size used in creating the 3597 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3598 calculated if N is given) For square matrices n is almost always m. 3599 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3600 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3601 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3602 (same value is used for all local rows) 3603 . d_nnz - array containing the number of nonzeros in the various rows of the 3604 DIAGONAL portion of the local submatrix (possibly different for each row) 3605 or NULL, if d_nz is used to specify the nonzero structure. 3606 The size of this array is equal to the number of local rows, i.e 'm'. 3607 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3608 submatrix (same value is used for all local rows). 3609 - o_nnz - array containing the number of nonzeros in the various rows of the 3610 OFF-DIAGONAL portion of the local submatrix (possibly different for 3611 each row) or NULL, if o_nz is used to specify the nonzero 3612 structure. The size of this array is equal to the number 3613 of local rows, i.e 'm'. 3614 3615 Output Parameter: 3616 . A - the matrix 3617 3618 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3619 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3620 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3621 3622 Notes: 3623 If the *_nnz parameter is given then the *_nz parameter is ignored 3624 3625 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3626 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3627 storage requirements for this matrix. 3628 3629 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3630 processor than it must be used on all processors that share the object for 3631 that argument. 3632 3633 The user MUST specify either the local or global matrix dimensions 3634 (possibly both). 3635 3636 The parallel matrix is partitioned across processors such that the 3637 first m0 rows belong to process 0, the next m1 rows belong to 3638 process 1, the next m2 rows belong to process 2 etc.. where 3639 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3640 values corresponding to [m x N] submatrix. 3641 3642 The columns are logically partitioned with the n0 columns belonging 3643 to 0th partition, the next n1 columns belonging to the next 3644 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3645 3646 The DIAGONAL portion of the local submatrix on any given processor 3647 is the submatrix corresponding to the rows and columns m,n 3648 corresponding to the given processor. i.e diagonal matrix on 3649 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3650 etc. The remaining portion of the local submatrix [m x (N-n)] 3651 constitute the OFF-DIAGONAL portion. The example below better 3652 illustrates this concept. 3653 3654 For a square global matrix we define each processor's diagonal portion 3655 to be its local rows and the corresponding columns (a square submatrix); 3656 each processor's off-diagonal portion encompasses the remainder of the 3657 local matrix (a rectangular submatrix). 3658 3659 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3660 3661 When calling this routine with a single process communicator, a matrix of 3662 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3663 type of communicator, use the construction mechanism: 3664 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3665 3666 By default, this format uses inodes (identical nodes) when possible. 3667 We search for consecutive rows with the same nonzero structure, thereby 3668 reusing matrix information to achieve increased efficiency. 3669 3670 Options Database Keys: 3671 + -mat_no_inode - Do not use inodes 3672 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3673 - -mat_aij_oneindex - Internally use indexing starting at 1 3674 rather than 0. Note that when calling MatSetValues(), 3675 the user still MUST index entries starting at 0! 3676 3677 3678 Example usage: 3679 3680 Consider the following 8x8 matrix with 34 non-zero values, that is 3681 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3682 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3683 as follows: 3684 3685 .vb 3686 1 2 0 | 0 3 0 | 0 4 3687 Proc0 0 5 6 | 7 0 0 | 8 0 3688 9 0 10 | 11 0 0 | 12 0 3689 ------------------------------------- 3690 13 0 14 | 15 16 17 | 0 0 3691 Proc1 0 18 0 | 19 20 21 | 0 0 3692 0 0 0 | 22 23 0 | 24 0 3693 ------------------------------------- 3694 Proc2 25 26 27 | 0 0 28 | 29 0 3695 30 0 0 | 31 32 33 | 0 34 3696 .ve 3697 3698 This can be represented as a collection of submatrices as: 3699 3700 .vb 3701 A B C 3702 D E F 3703 G H I 3704 .ve 3705 3706 Where the submatrices A,B,C are owned by proc0, D,E,F are 3707 owned by proc1, G,H,I are owned by proc2. 3708 3709 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3710 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3711 The 'M','N' parameters are 8,8, and have the same values on all procs. 3712 3713 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3714 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3715 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3716 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3717 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3718 matrix, ans [DF] as another SeqAIJ matrix. 3719 3720 When d_nz, o_nz parameters are specified, d_nz storage elements are 3721 allocated for every row of the local diagonal submatrix, and o_nz 3722 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3723 One way to choose d_nz and o_nz is to use the max nonzerors per local 3724 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3725 In this case, the values of d_nz,o_nz are: 3726 .vb 3727 proc0 : dnz = 2, o_nz = 2 3728 proc1 : dnz = 3, o_nz = 2 3729 proc2 : dnz = 1, o_nz = 4 3730 .ve 3731 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3732 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3733 for proc3. i.e we are using 12+15+10=37 storage locations to store 3734 34 values. 3735 3736 When d_nnz, o_nnz parameters are specified, the storage is specified 3737 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3738 In the above case the values for d_nnz,o_nnz are: 3739 .vb 3740 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3741 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3742 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3743 .ve 3744 Here the space allocated is sum of all the above values i.e 34, and 3745 hence pre-allocation is perfect. 3746 3747 Level: intermediate 3748 3749 .keywords: matrix, aij, compressed row, sparse, parallel 3750 3751 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3752 MPIAIJ, MatCreateMPIAIJWithArrays() 3753 @*/ 3754 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3755 { 3756 PetscErrorCode ierr; 3757 PetscMPIInt size; 3758 3759 PetscFunctionBegin; 3760 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3761 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3762 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3763 if (size > 1) { 3764 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3765 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3766 } else { 3767 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3768 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3769 } 3770 PetscFunctionReturn(0); 3771 } 3772 3773 #undef __FUNCT__ 3774 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3775 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3776 { 3777 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3778 PetscBool flg; 3779 PetscErrorCode ierr; 3780 3781 PetscFunctionBegin; 3782 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3783 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MPIAIJ matrix as input"); 3784 if (Ad) *Ad = a->A; 3785 if (Ao) *Ao = a->B; 3786 if (colmap) *colmap = a->garray; 3787 PetscFunctionReturn(0); 3788 } 3789 3790 #undef __FUNCT__ 3791 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3792 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3793 { 3794 PetscErrorCode ierr; 3795 PetscInt i; 3796 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3797 3798 PetscFunctionBegin; 3799 if (coloring->ctype == IS_COLORING_GLOBAL) { 3800 ISColoringValue *allcolors,*colors; 3801 ISColoring ocoloring; 3802 3803 /* set coloring for diagonal portion */ 3804 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3805 3806 /* set coloring for off-diagonal portion */ 3807 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3808 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3809 for (i=0; i<a->B->cmap->n; i++) { 3810 colors[i] = allcolors[a->garray[i]]; 3811 } 3812 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3813 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3814 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3815 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3816 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3817 ISColoringValue *colors; 3818 PetscInt *larray; 3819 ISColoring ocoloring; 3820 3821 /* set coloring for diagonal portion */ 3822 ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr); 3823 for (i=0; i<a->A->cmap->n; i++) { 3824 larray[i] = i + A->cmap->rstart; 3825 } 3826 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3827 ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr); 3828 for (i=0; i<a->A->cmap->n; i++) { 3829 colors[i] = coloring->colors[larray[i]]; 3830 } 3831 ierr = PetscFree(larray);CHKERRQ(ierr); 3832 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3833 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3834 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3835 3836 /* set coloring for off-diagonal portion */ 3837 ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr); 3838 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3839 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3840 for (i=0; i<a->B->cmap->n; i++) { 3841 colors[i] = coloring->colors[larray[i]]; 3842 } 3843 ierr = PetscFree(larray);CHKERRQ(ierr); 3844 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3845 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3846 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3847 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3848 PetscFunctionReturn(0); 3849 } 3850 3851 #undef __FUNCT__ 3852 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3853 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3854 { 3855 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3856 PetscErrorCode ierr; 3857 3858 PetscFunctionBegin; 3859 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3860 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3861 PetscFunctionReturn(0); 3862 } 3863 3864 #undef __FUNCT__ 3865 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3866 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3867 { 3868 PetscErrorCode ierr; 3869 PetscInt m,N,i,rstart,nnz,Ii; 3870 PetscInt *indx; 3871 PetscScalar *values; 3872 3873 PetscFunctionBegin; 3874 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3875 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3876 PetscInt *dnz,*onz,sum,bs,cbs; 3877 3878 if (n == PETSC_DECIDE) { 3879 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3880 } 3881 /* Check sum(n) = N */ 3882 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3883 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3884 3885 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3886 rstart -= m; 3887 3888 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3889 for (i=0; i<m; i++) { 3890 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3891 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3892 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3893 } 3894 3895 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3896 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3897 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3898 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3899 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3900 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3901 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3902 } 3903 3904 /* numeric phase */ 3905 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3906 for (i=0; i<m; i++) { 3907 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3908 Ii = i + rstart; 3909 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3910 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3911 } 3912 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3913 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3914 PetscFunctionReturn(0); 3915 } 3916 3917 #undef __FUNCT__ 3918 #define __FUNCT__ "MatFileSplit" 3919 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3920 { 3921 PetscErrorCode ierr; 3922 PetscMPIInt rank; 3923 PetscInt m,N,i,rstart,nnz; 3924 size_t len; 3925 const PetscInt *indx; 3926 PetscViewer out; 3927 char *name; 3928 Mat B; 3929 const PetscScalar *values; 3930 3931 PetscFunctionBegin; 3932 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3933 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3934 /* Should this be the type of the diagonal block of A? */ 3935 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3936 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3937 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3938 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3939 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3940 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3941 for (i=0; i<m; i++) { 3942 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3943 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3944 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3945 } 3946 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3947 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3948 3949 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3950 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3951 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3952 sprintf(name,"%s.%d",outfile,rank); 3953 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3954 ierr = PetscFree(name);CHKERRQ(ierr); 3955 ierr = MatView(B,out);CHKERRQ(ierr); 3956 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3957 ierr = MatDestroy(&B);CHKERRQ(ierr); 3958 PetscFunctionReturn(0); 3959 } 3960 3961 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3962 #undef __FUNCT__ 3963 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3964 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3965 { 3966 PetscErrorCode ierr; 3967 Mat_Merge_SeqsToMPI *merge; 3968 PetscContainer container; 3969 3970 PetscFunctionBegin; 3971 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3972 if (container) { 3973 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3974 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3975 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3976 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3977 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3978 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3979 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3980 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3981 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3982 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3983 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3984 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3985 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3986 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3987 ierr = PetscFree(merge);CHKERRQ(ierr); 3988 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3989 } 3990 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3991 PetscFunctionReturn(0); 3992 } 3993 3994 #include <../src/mat/utils/freespace.h> 3995 #include <petscbt.h> 3996 3997 #undef __FUNCT__ 3998 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3999 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4000 { 4001 PetscErrorCode ierr; 4002 MPI_Comm comm; 4003 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4004 PetscMPIInt size,rank,taga,*len_s; 4005 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4006 PetscInt proc,m; 4007 PetscInt **buf_ri,**buf_rj; 4008 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4009 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4010 MPI_Request *s_waits,*r_waits; 4011 MPI_Status *status; 4012 MatScalar *aa=a->a; 4013 MatScalar **abuf_r,*ba_i; 4014 Mat_Merge_SeqsToMPI *merge; 4015 PetscContainer container; 4016 4017 PetscFunctionBegin; 4018 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4019 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4020 4021 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4022 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4023 4024 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4025 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4026 4027 bi = merge->bi; 4028 bj = merge->bj; 4029 buf_ri = merge->buf_ri; 4030 buf_rj = merge->buf_rj; 4031 4032 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4033 owners = merge->rowmap->range; 4034 len_s = merge->len_s; 4035 4036 /* send and recv matrix values */ 4037 /*-----------------------------*/ 4038 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4039 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4040 4041 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4042 for (proc=0,k=0; proc<size; proc++) { 4043 if (!len_s[proc]) continue; 4044 i = owners[proc]; 4045 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4046 k++; 4047 } 4048 4049 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4050 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4051 ierr = PetscFree(status);CHKERRQ(ierr); 4052 4053 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4054 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4055 4056 /* insert mat values of mpimat */ 4057 /*----------------------------*/ 4058 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4059 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4060 4061 for (k=0; k<merge->nrecv; k++) { 4062 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4063 nrows = *(buf_ri_k[k]); 4064 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4065 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4066 } 4067 4068 /* set values of ba */ 4069 m = merge->rowmap->n; 4070 for (i=0; i<m; i++) { 4071 arow = owners[rank] + i; 4072 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4073 bnzi = bi[i+1] - bi[i]; 4074 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4075 4076 /* add local non-zero vals of this proc's seqmat into ba */ 4077 anzi = ai[arow+1] - ai[arow]; 4078 aj = a->j + ai[arow]; 4079 aa = a->a + ai[arow]; 4080 nextaj = 0; 4081 for (j=0; nextaj<anzi; j++) { 4082 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4083 ba_i[j] += aa[nextaj++]; 4084 } 4085 } 4086 4087 /* add received vals into ba */ 4088 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4089 /* i-th row */ 4090 if (i == *nextrow[k]) { 4091 anzi = *(nextai[k]+1) - *nextai[k]; 4092 aj = buf_rj[k] + *(nextai[k]); 4093 aa = abuf_r[k] + *(nextai[k]); 4094 nextaj = 0; 4095 for (j=0; nextaj<anzi; j++) { 4096 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4097 ba_i[j] += aa[nextaj++]; 4098 } 4099 } 4100 nextrow[k]++; nextai[k]++; 4101 } 4102 } 4103 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4104 } 4105 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4106 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4107 4108 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4109 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4110 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4111 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4112 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4113 PetscFunctionReturn(0); 4114 } 4115 4116 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4117 4118 #undef __FUNCT__ 4119 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4120 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4121 { 4122 PetscErrorCode ierr; 4123 Mat B_mpi; 4124 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4125 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4126 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4127 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4128 PetscInt len,proc,*dnz,*onz,bs,cbs; 4129 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4130 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4131 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4132 MPI_Status *status; 4133 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4134 PetscBT lnkbt; 4135 Mat_Merge_SeqsToMPI *merge; 4136 PetscContainer container; 4137 4138 PetscFunctionBegin; 4139 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4140 4141 /* make sure it is a PETSc comm */ 4142 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4143 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4144 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4145 4146 ierr = PetscNew(&merge);CHKERRQ(ierr); 4147 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4148 4149 /* determine row ownership */ 4150 /*---------------------------------------------------------*/ 4151 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4152 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4153 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4154 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4155 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4156 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4157 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4158 4159 m = merge->rowmap->n; 4160 owners = merge->rowmap->range; 4161 4162 /* determine the number of messages to send, their lengths */ 4163 /*---------------------------------------------------------*/ 4164 len_s = merge->len_s; 4165 4166 len = 0; /* length of buf_si[] */ 4167 merge->nsend = 0; 4168 for (proc=0; proc<size; proc++) { 4169 len_si[proc] = 0; 4170 if (proc == rank) { 4171 len_s[proc] = 0; 4172 } else { 4173 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4174 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4175 } 4176 if (len_s[proc]) { 4177 merge->nsend++; 4178 nrows = 0; 4179 for (i=owners[proc]; i<owners[proc+1]; i++) { 4180 if (ai[i+1] > ai[i]) nrows++; 4181 } 4182 len_si[proc] = 2*(nrows+1); 4183 len += len_si[proc]; 4184 } 4185 } 4186 4187 /* determine the number and length of messages to receive for ij-structure */ 4188 /*-------------------------------------------------------------------------*/ 4189 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4190 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4191 4192 /* post the Irecv of j-structure */ 4193 /*-------------------------------*/ 4194 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4195 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4196 4197 /* post the Isend of j-structure */ 4198 /*--------------------------------*/ 4199 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4200 4201 for (proc=0, k=0; proc<size; proc++) { 4202 if (!len_s[proc]) continue; 4203 i = owners[proc]; 4204 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4205 k++; 4206 } 4207 4208 /* receives and sends of j-structure are complete */ 4209 /*------------------------------------------------*/ 4210 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4211 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4212 4213 /* send and recv i-structure */ 4214 /*---------------------------*/ 4215 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4216 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4217 4218 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4219 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4220 for (proc=0,k=0; proc<size; proc++) { 4221 if (!len_s[proc]) continue; 4222 /* form outgoing message for i-structure: 4223 buf_si[0]: nrows to be sent 4224 [1:nrows]: row index (global) 4225 [nrows+1:2*nrows+1]: i-structure index 4226 */ 4227 /*-------------------------------------------*/ 4228 nrows = len_si[proc]/2 - 1; 4229 buf_si_i = buf_si + nrows+1; 4230 buf_si[0] = nrows; 4231 buf_si_i[0] = 0; 4232 nrows = 0; 4233 for (i=owners[proc]; i<owners[proc+1]; i++) { 4234 anzi = ai[i+1] - ai[i]; 4235 if (anzi) { 4236 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4237 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4238 nrows++; 4239 } 4240 } 4241 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4242 k++; 4243 buf_si += len_si[proc]; 4244 } 4245 4246 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4247 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4248 4249 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4250 for (i=0; i<merge->nrecv; i++) { 4251 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4252 } 4253 4254 ierr = PetscFree(len_si);CHKERRQ(ierr); 4255 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4256 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4257 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4258 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4259 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4260 ierr = PetscFree(status);CHKERRQ(ierr); 4261 4262 /* compute a local seq matrix in each processor */ 4263 /*----------------------------------------------*/ 4264 /* allocate bi array and free space for accumulating nonzero column info */ 4265 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4266 bi[0] = 0; 4267 4268 /* create and initialize a linked list */ 4269 nlnk = N+1; 4270 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4271 4272 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4273 len = ai[owners[rank+1]] - ai[owners[rank]]; 4274 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4275 4276 current_space = free_space; 4277 4278 /* determine symbolic info for each local row */ 4279 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4280 4281 for (k=0; k<merge->nrecv; k++) { 4282 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4283 nrows = *buf_ri_k[k]; 4284 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4285 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4286 } 4287 4288 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4289 len = 0; 4290 for (i=0; i<m; i++) { 4291 bnzi = 0; 4292 /* add local non-zero cols of this proc's seqmat into lnk */ 4293 arow = owners[rank] + i; 4294 anzi = ai[arow+1] - ai[arow]; 4295 aj = a->j + ai[arow]; 4296 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4297 bnzi += nlnk; 4298 /* add received col data into lnk */ 4299 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4300 if (i == *nextrow[k]) { /* i-th row */ 4301 anzi = *(nextai[k]+1) - *nextai[k]; 4302 aj = buf_rj[k] + *nextai[k]; 4303 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4304 bnzi += nlnk; 4305 nextrow[k]++; nextai[k]++; 4306 } 4307 } 4308 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4309 4310 /* if free space is not available, make more free space */ 4311 if (current_space->local_remaining<bnzi) { 4312 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4313 nspacedouble++; 4314 } 4315 /* copy data into free space, then initialize lnk */ 4316 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4317 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4318 4319 current_space->array += bnzi; 4320 current_space->local_used += bnzi; 4321 current_space->local_remaining -= bnzi; 4322 4323 bi[i+1] = bi[i] + bnzi; 4324 } 4325 4326 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4327 4328 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4329 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4330 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4331 4332 /* create symbolic parallel matrix B_mpi */ 4333 /*---------------------------------------*/ 4334 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4335 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4336 if (n==PETSC_DECIDE) { 4337 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4338 } else { 4339 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4340 } 4341 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4342 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4343 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4344 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4345 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4346 4347 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4348 B_mpi->assembled = PETSC_FALSE; 4349 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4350 merge->bi = bi; 4351 merge->bj = bj; 4352 merge->buf_ri = buf_ri; 4353 merge->buf_rj = buf_rj; 4354 merge->coi = NULL; 4355 merge->coj = NULL; 4356 merge->owners_co = NULL; 4357 4358 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4359 4360 /* attach the supporting struct to B_mpi for reuse */ 4361 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4362 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4363 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4364 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4365 *mpimat = B_mpi; 4366 4367 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4368 PetscFunctionReturn(0); 4369 } 4370 4371 #undef __FUNCT__ 4372 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4373 /*@C 4374 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4375 matrices from each processor 4376 4377 Collective on MPI_Comm 4378 4379 Input Parameters: 4380 + comm - the communicators the parallel matrix will live on 4381 . seqmat - the input sequential matrices 4382 . m - number of local rows (or PETSC_DECIDE) 4383 . n - number of local columns (or PETSC_DECIDE) 4384 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4385 4386 Output Parameter: 4387 . mpimat - the parallel matrix generated 4388 4389 Level: advanced 4390 4391 Notes: 4392 The dimensions of the sequential matrix in each processor MUST be the same. 4393 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4394 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4395 @*/ 4396 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4397 { 4398 PetscErrorCode ierr; 4399 PetscMPIInt size; 4400 4401 PetscFunctionBegin; 4402 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4403 if (size == 1) { 4404 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4405 if (scall == MAT_INITIAL_MATRIX) { 4406 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4407 } else { 4408 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4409 } 4410 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4411 PetscFunctionReturn(0); 4412 } 4413 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4414 if (scall == MAT_INITIAL_MATRIX) { 4415 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4416 } 4417 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4418 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4419 PetscFunctionReturn(0); 4420 } 4421 4422 #undef __FUNCT__ 4423 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4424 /*@ 4425 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4426 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4427 with MatGetSize() 4428 4429 Not Collective 4430 4431 Input Parameters: 4432 + A - the matrix 4433 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4434 4435 Output Parameter: 4436 . A_loc - the local sequential matrix generated 4437 4438 Level: developer 4439 4440 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4441 4442 @*/ 4443 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4444 { 4445 PetscErrorCode ierr; 4446 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4447 Mat_SeqAIJ *mat,*a,*b; 4448 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4449 MatScalar *aa,*ba,*cam; 4450 PetscScalar *ca; 4451 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4452 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4453 PetscBool match; 4454 MPI_Comm comm; 4455 PetscMPIInt size; 4456 4457 PetscFunctionBegin; 4458 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4459 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4460 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4461 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4462 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4463 4464 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4465 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4466 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4467 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4468 aa = a->a; ba = b->a; 4469 if (scall == MAT_INITIAL_MATRIX) { 4470 if (size == 1) { 4471 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4472 PetscFunctionReturn(0); 4473 } 4474 4475 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4476 ci[0] = 0; 4477 for (i=0; i<am; i++) { 4478 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4479 } 4480 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4481 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4482 k = 0; 4483 for (i=0; i<am; i++) { 4484 ncols_o = bi[i+1] - bi[i]; 4485 ncols_d = ai[i+1] - ai[i]; 4486 /* off-diagonal portion of A */ 4487 for (jo=0; jo<ncols_o; jo++) { 4488 col = cmap[*bj]; 4489 if (col >= cstart) break; 4490 cj[k] = col; bj++; 4491 ca[k++] = *ba++; 4492 } 4493 /* diagonal portion of A */ 4494 for (j=0; j<ncols_d; j++) { 4495 cj[k] = cstart + *aj++; 4496 ca[k++] = *aa++; 4497 } 4498 /* off-diagonal portion of A */ 4499 for (j=jo; j<ncols_o; j++) { 4500 cj[k] = cmap[*bj++]; 4501 ca[k++] = *ba++; 4502 } 4503 } 4504 /* put together the new matrix */ 4505 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4506 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4507 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4508 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4509 mat->free_a = PETSC_TRUE; 4510 mat->free_ij = PETSC_TRUE; 4511 mat->nonew = 0; 4512 } else if (scall == MAT_REUSE_MATRIX) { 4513 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4514 ci = mat->i; cj = mat->j; cam = mat->a; 4515 for (i=0; i<am; i++) { 4516 /* off-diagonal portion of A */ 4517 ncols_o = bi[i+1] - bi[i]; 4518 for (jo=0; jo<ncols_o; jo++) { 4519 col = cmap[*bj]; 4520 if (col >= cstart) break; 4521 *cam++ = *ba++; bj++; 4522 } 4523 /* diagonal portion of A */ 4524 ncols_d = ai[i+1] - ai[i]; 4525 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4526 /* off-diagonal portion of A */ 4527 for (j=jo; j<ncols_o; j++) { 4528 *cam++ = *ba++; bj++; 4529 } 4530 } 4531 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4532 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4533 PetscFunctionReturn(0); 4534 } 4535 4536 #undef __FUNCT__ 4537 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4538 /*@C 4539 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4540 4541 Not Collective 4542 4543 Input Parameters: 4544 + A - the matrix 4545 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4546 - row, col - index sets of rows and columns to extract (or NULL) 4547 4548 Output Parameter: 4549 . A_loc - the local sequential matrix generated 4550 4551 Level: developer 4552 4553 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4554 4555 @*/ 4556 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4557 { 4558 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4559 PetscErrorCode ierr; 4560 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4561 IS isrowa,iscola; 4562 Mat *aloc; 4563 PetscBool match; 4564 4565 PetscFunctionBegin; 4566 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4567 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4568 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4569 if (!row) { 4570 start = A->rmap->rstart; end = A->rmap->rend; 4571 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4572 } else { 4573 isrowa = *row; 4574 } 4575 if (!col) { 4576 start = A->cmap->rstart; 4577 cmap = a->garray; 4578 nzA = a->A->cmap->n; 4579 nzB = a->B->cmap->n; 4580 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4581 ncols = 0; 4582 for (i=0; i<nzB; i++) { 4583 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4584 else break; 4585 } 4586 imark = i; 4587 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4588 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4589 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4590 } else { 4591 iscola = *col; 4592 } 4593 if (scall != MAT_INITIAL_MATRIX) { 4594 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4595 aloc[0] = *A_loc; 4596 } 4597 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4598 *A_loc = aloc[0]; 4599 ierr = PetscFree(aloc);CHKERRQ(ierr); 4600 if (!row) { 4601 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4602 } 4603 if (!col) { 4604 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4605 } 4606 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4607 PetscFunctionReturn(0); 4608 } 4609 4610 #undef __FUNCT__ 4611 #define __FUNCT__ "MatGetBrowsOfAcols" 4612 /*@C 4613 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4614 4615 Collective on Mat 4616 4617 Input Parameters: 4618 + A,B - the matrices in mpiaij format 4619 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4620 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4621 4622 Output Parameter: 4623 + rowb, colb - index sets of rows and columns of B to extract 4624 - B_seq - the sequential matrix generated 4625 4626 Level: developer 4627 4628 @*/ 4629 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4630 { 4631 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4632 PetscErrorCode ierr; 4633 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4634 IS isrowb,iscolb; 4635 Mat *bseq=NULL; 4636 4637 PetscFunctionBegin; 4638 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4639 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4640 } 4641 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4642 4643 if (scall == MAT_INITIAL_MATRIX) { 4644 start = A->cmap->rstart; 4645 cmap = a->garray; 4646 nzA = a->A->cmap->n; 4647 nzB = a->B->cmap->n; 4648 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4649 ncols = 0; 4650 for (i=0; i<nzB; i++) { /* row < local row index */ 4651 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4652 else break; 4653 } 4654 imark = i; 4655 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4656 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4657 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4658 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4659 } else { 4660 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4661 isrowb = *rowb; iscolb = *colb; 4662 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4663 bseq[0] = *B_seq; 4664 } 4665 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4666 *B_seq = bseq[0]; 4667 ierr = PetscFree(bseq);CHKERRQ(ierr); 4668 if (!rowb) { 4669 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4670 } else { 4671 *rowb = isrowb; 4672 } 4673 if (!colb) { 4674 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4675 } else { 4676 *colb = iscolb; 4677 } 4678 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4679 PetscFunctionReturn(0); 4680 } 4681 4682 #undef __FUNCT__ 4683 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4684 /* 4685 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4686 of the OFF-DIAGONAL portion of local A 4687 4688 Collective on Mat 4689 4690 Input Parameters: 4691 + A,B - the matrices in mpiaij format 4692 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4693 4694 Output Parameter: 4695 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4696 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4697 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4698 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4699 4700 Level: developer 4701 4702 */ 4703 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4704 { 4705 VecScatter_MPI_General *gen_to,*gen_from; 4706 PetscErrorCode ierr; 4707 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4708 Mat_SeqAIJ *b_oth; 4709 VecScatter ctx =a->Mvctx; 4710 MPI_Comm comm; 4711 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4712 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4713 PetscScalar *rvalues,*svalues; 4714 MatScalar *b_otha,*bufa,*bufA; 4715 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4716 MPI_Request *rwaits = NULL,*swaits = NULL; 4717 MPI_Status *sstatus,rstatus; 4718 PetscMPIInt jj,size; 4719 PetscInt *cols,sbs,rbs; 4720 PetscScalar *vals; 4721 4722 PetscFunctionBegin; 4723 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4724 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4725 4726 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4727 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4728 } 4729 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4730 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4731 4732 gen_to = (VecScatter_MPI_General*)ctx->todata; 4733 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4734 rvalues = gen_from->values; /* holds the length of receiving row */ 4735 svalues = gen_to->values; /* holds the length of sending row */ 4736 nrecvs = gen_from->n; 4737 nsends = gen_to->n; 4738 4739 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4740 srow = gen_to->indices; /* local row index to be sent */ 4741 sstarts = gen_to->starts; 4742 sprocs = gen_to->procs; 4743 sstatus = gen_to->sstatus; 4744 sbs = gen_to->bs; 4745 rstarts = gen_from->starts; 4746 rprocs = gen_from->procs; 4747 rbs = gen_from->bs; 4748 4749 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4750 if (scall == MAT_INITIAL_MATRIX) { 4751 /* i-array */ 4752 /*---------*/ 4753 /* post receives */ 4754 for (i=0; i<nrecvs; i++) { 4755 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4756 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4757 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4758 } 4759 4760 /* pack the outgoing message */ 4761 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4762 4763 sstartsj[0] = 0; 4764 rstartsj[0] = 0; 4765 len = 0; /* total length of j or a array to be sent */ 4766 k = 0; 4767 for (i=0; i<nsends; i++) { 4768 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4769 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4770 for (j=0; j<nrows; j++) { 4771 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4772 for (l=0; l<sbs; l++) { 4773 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4774 4775 rowlen[j*sbs+l] = ncols; 4776 4777 len += ncols; 4778 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4779 } 4780 k++; 4781 } 4782 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4783 4784 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4785 } 4786 /* recvs and sends of i-array are completed */ 4787 i = nrecvs; 4788 while (i--) { 4789 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4790 } 4791 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4792 4793 /* allocate buffers for sending j and a arrays */ 4794 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4795 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4796 4797 /* create i-array of B_oth */ 4798 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4799 4800 b_othi[0] = 0; 4801 len = 0; /* total length of j or a array to be received */ 4802 k = 0; 4803 for (i=0; i<nrecvs; i++) { 4804 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4805 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4806 for (j=0; j<nrows; j++) { 4807 b_othi[k+1] = b_othi[k] + rowlen[j]; 4808 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4809 k++; 4810 } 4811 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4812 } 4813 4814 /* allocate space for j and a arrrays of B_oth */ 4815 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4816 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4817 4818 /* j-array */ 4819 /*---------*/ 4820 /* post receives of j-array */ 4821 for (i=0; i<nrecvs; i++) { 4822 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4823 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4824 } 4825 4826 /* pack the outgoing message j-array */ 4827 k = 0; 4828 for (i=0; i<nsends; i++) { 4829 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4830 bufJ = bufj+sstartsj[i]; 4831 for (j=0; j<nrows; j++) { 4832 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4833 for (ll=0; ll<sbs; ll++) { 4834 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4835 for (l=0; l<ncols; l++) { 4836 *bufJ++ = cols[l]; 4837 } 4838 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4839 } 4840 } 4841 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4842 } 4843 4844 /* recvs and sends of j-array are completed */ 4845 i = nrecvs; 4846 while (i--) { 4847 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4848 } 4849 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4850 } else if (scall == MAT_REUSE_MATRIX) { 4851 sstartsj = *startsj_s; 4852 rstartsj = *startsj_r; 4853 bufa = *bufa_ptr; 4854 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4855 b_otha = b_oth->a; 4856 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4857 4858 /* a-array */ 4859 /*---------*/ 4860 /* post receives of a-array */ 4861 for (i=0; i<nrecvs; i++) { 4862 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4863 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4864 } 4865 4866 /* pack the outgoing message a-array */ 4867 k = 0; 4868 for (i=0; i<nsends; i++) { 4869 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4870 bufA = bufa+sstartsj[i]; 4871 for (j=0; j<nrows; j++) { 4872 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4873 for (ll=0; ll<sbs; ll++) { 4874 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4875 for (l=0; l<ncols; l++) { 4876 *bufA++ = vals[l]; 4877 } 4878 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4879 } 4880 } 4881 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4882 } 4883 /* recvs and sends of a-array are completed */ 4884 i = nrecvs; 4885 while (i--) { 4886 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4887 } 4888 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4889 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4890 4891 if (scall == MAT_INITIAL_MATRIX) { 4892 /* put together the new matrix */ 4893 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4894 4895 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4896 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4897 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4898 b_oth->free_a = PETSC_TRUE; 4899 b_oth->free_ij = PETSC_TRUE; 4900 b_oth->nonew = 0; 4901 4902 ierr = PetscFree(bufj);CHKERRQ(ierr); 4903 if (!startsj_s || !bufa_ptr) { 4904 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4905 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4906 } else { 4907 *startsj_s = sstartsj; 4908 *startsj_r = rstartsj; 4909 *bufa_ptr = bufa; 4910 } 4911 } 4912 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4913 PetscFunctionReturn(0); 4914 } 4915 4916 #undef __FUNCT__ 4917 #define __FUNCT__ "MatGetCommunicationStructs" 4918 /*@C 4919 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4920 4921 Not Collective 4922 4923 Input Parameters: 4924 . A - The matrix in mpiaij format 4925 4926 Output Parameter: 4927 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4928 . colmap - A map from global column index to local index into lvec 4929 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4930 4931 Level: developer 4932 4933 @*/ 4934 #if defined(PETSC_USE_CTABLE) 4935 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4936 #else 4937 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4938 #endif 4939 { 4940 Mat_MPIAIJ *a; 4941 4942 PetscFunctionBegin; 4943 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4944 PetscValidPointer(lvec, 2); 4945 PetscValidPointer(colmap, 3); 4946 PetscValidPointer(multScatter, 4); 4947 a = (Mat_MPIAIJ*) A->data; 4948 if (lvec) *lvec = a->lvec; 4949 if (colmap) *colmap = a->colmap; 4950 if (multScatter) *multScatter = a->Mvctx; 4951 PetscFunctionReturn(0); 4952 } 4953 4954 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4956 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4957 #if defined(PETSC_HAVE_ELEMENTAL) 4958 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4959 #endif 4960 4961 #undef __FUNCT__ 4962 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4963 /* 4964 Computes (B'*A')' since computing B*A directly is untenable 4965 4966 n p p 4967 ( ) ( ) ( ) 4968 m ( A ) * n ( B ) = m ( C ) 4969 ( ) ( ) ( ) 4970 4971 */ 4972 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4973 { 4974 PetscErrorCode ierr; 4975 Mat At,Bt,Ct; 4976 4977 PetscFunctionBegin; 4978 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4979 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4980 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4981 ierr = MatDestroy(&At);CHKERRQ(ierr); 4982 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4983 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4984 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4985 PetscFunctionReturn(0); 4986 } 4987 4988 #undef __FUNCT__ 4989 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4990 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4991 { 4992 PetscErrorCode ierr; 4993 PetscInt m=A->rmap->n,n=B->cmap->n; 4994 Mat Cmat; 4995 4996 PetscFunctionBegin; 4997 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4998 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4999 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5000 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5001 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5002 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5003 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5004 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5005 5006 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5007 5008 *C = Cmat; 5009 PetscFunctionReturn(0); 5010 } 5011 5012 /* ----------------------------------------------------------------*/ 5013 #undef __FUNCT__ 5014 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5015 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5016 { 5017 PetscErrorCode ierr; 5018 5019 PetscFunctionBegin; 5020 if (scall == MAT_INITIAL_MATRIX) { 5021 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5022 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5023 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5024 } 5025 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5026 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5027 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5028 PetscFunctionReturn(0); 5029 } 5030 5031 /*MC 5032 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5033 5034 Options Database Keys: 5035 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5036 5037 Level: beginner 5038 5039 .seealso: MatCreateAIJ() 5040 M*/ 5041 5042 #undef __FUNCT__ 5043 #define __FUNCT__ "MatCreate_MPIAIJ" 5044 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5045 { 5046 Mat_MPIAIJ *b; 5047 PetscErrorCode ierr; 5048 PetscMPIInt size; 5049 5050 PetscFunctionBegin; 5051 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5052 5053 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5054 B->data = (void*)b; 5055 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5056 B->assembled = PETSC_FALSE; 5057 B->insertmode = NOT_SET_VALUES; 5058 b->size = size; 5059 5060 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5061 5062 /* build cache for off array entries formed */ 5063 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5064 5065 b->donotstash = PETSC_FALSE; 5066 b->colmap = 0; 5067 b->garray = 0; 5068 b->roworiented = PETSC_TRUE; 5069 5070 /* stuff used for matrix vector multiply */ 5071 b->lvec = NULL; 5072 b->Mvctx = NULL; 5073 5074 /* stuff for MatGetRow() */ 5075 b->rowindices = 0; 5076 b->rowvalues = 0; 5077 b->getrowactive = PETSC_FALSE; 5078 5079 /* flexible pointer used in CUSP/CUSPARSE classes */ 5080 b->spptr = NULL; 5081 5082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5093 #if defined(PETSC_HAVE_ELEMENTAL) 5094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5095 #endif 5096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5099 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5100 PetscFunctionReturn(0); 5101 } 5102 5103 #undef __FUNCT__ 5104 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5105 /*@C 5106 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5107 and "off-diagonal" part of the matrix in CSR format. 5108 5109 Collective on MPI_Comm 5110 5111 Input Parameters: 5112 + comm - MPI communicator 5113 . m - number of local rows (Cannot be PETSC_DECIDE) 5114 . n - This value should be the same as the local size used in creating the 5115 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5116 calculated if N is given) For square matrices n is almost always m. 5117 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5118 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5119 . i - row indices for "diagonal" portion of matrix 5120 . j - column indices 5121 . a - matrix values 5122 . oi - row indices for "off-diagonal" portion of matrix 5123 . oj - column indices 5124 - oa - matrix values 5125 5126 Output Parameter: 5127 . mat - the matrix 5128 5129 Level: advanced 5130 5131 Notes: 5132 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5133 must free the arrays once the matrix has been destroyed and not before. 5134 5135 The i and j indices are 0 based 5136 5137 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5138 5139 This sets local rows and cannot be used to set off-processor values. 5140 5141 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5142 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5143 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5144 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5145 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5146 communication if it is known that only local entries will be set. 5147 5148 .keywords: matrix, aij, compressed row, sparse, parallel 5149 5150 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5151 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5152 @*/ 5153 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5154 { 5155 PetscErrorCode ierr; 5156 Mat_MPIAIJ *maij; 5157 5158 PetscFunctionBegin; 5159 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5160 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5161 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5162 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5163 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5164 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5165 maij = (Mat_MPIAIJ*) (*mat)->data; 5166 5167 (*mat)->preallocated = PETSC_TRUE; 5168 5169 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5170 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5171 5172 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5173 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5174 5175 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5176 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5177 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5178 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5179 5180 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5181 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5182 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5183 PetscFunctionReturn(0); 5184 } 5185 5186 /* 5187 Special version for direct calls from Fortran 5188 */ 5189 #include <petsc/private/fortranimpl.h> 5190 5191 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5192 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5193 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5194 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5195 #endif 5196 5197 /* Change these macros so can be used in void function */ 5198 #undef CHKERRQ 5199 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5200 #undef SETERRQ2 5201 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5202 #undef SETERRQ3 5203 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5204 #undef SETERRQ 5205 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5206 5207 #undef __FUNCT__ 5208 #define __FUNCT__ "matsetvaluesmpiaij_" 5209 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5210 { 5211 Mat mat = *mmat; 5212 PetscInt m = *mm, n = *mn; 5213 InsertMode addv = *maddv; 5214 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5215 PetscScalar value; 5216 PetscErrorCode ierr; 5217 5218 MatCheckPreallocated(mat,1); 5219 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5220 5221 #if defined(PETSC_USE_DEBUG) 5222 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5223 #endif 5224 { 5225 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5226 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5227 PetscBool roworiented = aij->roworiented; 5228 5229 /* Some Variables required in the macro */ 5230 Mat A = aij->A; 5231 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5232 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5233 MatScalar *aa = a->a; 5234 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5235 Mat B = aij->B; 5236 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5237 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5238 MatScalar *ba = b->a; 5239 5240 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5241 PetscInt nonew = a->nonew; 5242 MatScalar *ap1,*ap2; 5243 5244 PetscFunctionBegin; 5245 for (i=0; i<m; i++) { 5246 if (im[i] < 0) continue; 5247 #if defined(PETSC_USE_DEBUG) 5248 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5249 #endif 5250 if (im[i] >= rstart && im[i] < rend) { 5251 row = im[i] - rstart; 5252 lastcol1 = -1; 5253 rp1 = aj + ai[row]; 5254 ap1 = aa + ai[row]; 5255 rmax1 = aimax[row]; 5256 nrow1 = ailen[row]; 5257 low1 = 0; 5258 high1 = nrow1; 5259 lastcol2 = -1; 5260 rp2 = bj + bi[row]; 5261 ap2 = ba + bi[row]; 5262 rmax2 = bimax[row]; 5263 nrow2 = bilen[row]; 5264 low2 = 0; 5265 high2 = nrow2; 5266 5267 for (j=0; j<n; j++) { 5268 if (roworiented) value = v[i*n+j]; 5269 else value = v[i+j*m]; 5270 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5271 if (in[j] >= cstart && in[j] < cend) { 5272 col = in[j] - cstart; 5273 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5274 } else if (in[j] < 0) continue; 5275 #if defined(PETSC_USE_DEBUG) 5276 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5277 #endif 5278 else { 5279 if (mat->was_assembled) { 5280 if (!aij->colmap) { 5281 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5282 } 5283 #if defined(PETSC_USE_CTABLE) 5284 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5285 col--; 5286 #else 5287 col = aij->colmap[in[j]] - 1; 5288 #endif 5289 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5290 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5291 col = in[j]; 5292 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5293 B = aij->B; 5294 b = (Mat_SeqAIJ*)B->data; 5295 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5296 rp2 = bj + bi[row]; 5297 ap2 = ba + bi[row]; 5298 rmax2 = bimax[row]; 5299 nrow2 = bilen[row]; 5300 low2 = 0; 5301 high2 = nrow2; 5302 bm = aij->B->rmap->n; 5303 ba = b->a; 5304 } 5305 } else col = in[j]; 5306 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5307 } 5308 } 5309 } else if (!aij->donotstash) { 5310 if (roworiented) { 5311 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5312 } else { 5313 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5314 } 5315 } 5316 } 5317 } 5318 PetscFunctionReturnVoid(); 5319 } 5320 5321