1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 217 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) { \ 443 ap1[_i] += value; \ 444 /* Not sure LogFlops will slow dow the code or not */ \ 445 (void)PetscLogFlops(1.0); \ 446 } \ 447 else ap1[_i] = value; \ 448 goto a_noinsert; \ 449 } \ 450 } \ 451 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 452 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 453 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 454 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 455 N = nrow1++ - 1; a->nz++; high1++; \ 456 /* shift up all the later entries in this row */ \ 457 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 458 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 459 rp1[_i] = col; \ 460 ap1[_i] = value; \ 461 A->nonzerostate++;\ 462 a_noinsert: ; \ 463 ailen[row] = nrow1; \ 464 } 465 466 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 467 { \ 468 if (col <= lastcol2) low2 = 0; \ 469 else high2 = nrow2; \ 470 lastcol2 = col; \ 471 while (high2-low2 > 5) { \ 472 t = (low2+high2)/2; \ 473 if (rp2[t] > col) high2 = t; \ 474 else low2 = t; \ 475 } \ 476 for (_i=low2; _i<high2; _i++) { \ 477 if (rp2[_i] > col) break; \ 478 if (rp2[_i] == col) { \ 479 if (addv == ADD_VALUES) { \ 480 ap2[_i] += value; \ 481 (void)PetscLogFlops(1.0); \ 482 } \ 483 else ap2[_i] = value; \ 484 goto b_noinsert; \ 485 } \ 486 } \ 487 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 488 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 489 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 490 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 491 N = nrow2++ - 1; b->nz++; high2++; \ 492 /* shift up all the later entries in this row */ \ 493 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 494 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 495 rp2[_i] = col; \ 496 ap2[_i] = value; \ 497 B->nonzerostate++; \ 498 b_noinsert: ; \ 499 bilen[row] = nrow2; \ 500 } 501 502 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 503 { 504 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 505 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 506 PetscErrorCode ierr; 507 PetscInt l,*garray = mat->garray,diag; 508 509 PetscFunctionBegin; 510 /* code only works for square matrices A */ 511 512 /* find size of row to the left of the diagonal part */ 513 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 514 row = row - diag; 515 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 516 if (garray[b->j[b->i[row]+l]] > diag) break; 517 } 518 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 519 520 /* diagonal part */ 521 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 522 523 /* right of diagonal part */ 524 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 525 PetscFunctionReturn(0); 526 } 527 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value = 0.0; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 577 if (in[j] >= cstart && in[j] < cend) { 578 col = in[j] - cstart; 579 nonew = a->nonew; 580 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 581 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 582 } else if (in[j] < 0) continue; 583 #if defined(PETSC_USE_DEBUG) 584 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 585 #endif 586 else { 587 if (mat->was_assembled) { 588 if (!aij->colmap) { 589 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 590 } 591 #if defined(PETSC_USE_CTABLE) 592 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 593 col--; 594 #else 595 col = aij->colmap[in[j]] - 1; 596 #endif 597 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 598 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 599 col = in[j]; 600 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 601 B = aij->B; 602 b = (Mat_SeqAIJ*)B->data; 603 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 604 rp2 = bj + bi[row]; 605 ap2 = ba + bi[row]; 606 rmax2 = bimax[row]; 607 nrow2 = bilen[row]; 608 low2 = 0; 609 high2 = nrow2; 610 bm = aij->B->rmap->n; 611 ba = b->a; 612 } else if (col < 0) { 613 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 614 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 615 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 616 } 617 } else col = in[j]; 618 nonew = b->nonew; 619 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 620 } 621 } 622 } else { 623 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 624 if (!aij->donotstash) { 625 mat->assembled = PETSC_FALSE; 626 if (roworiented) { 627 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } else { 629 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 630 } 631 } 632 } 633 } 634 PetscFunctionReturn(0); 635 } 636 637 /* 638 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 639 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 640 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 641 */ 642 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 643 { 644 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 645 Mat A = aij->A; /* diagonal part of the matrix */ 646 Mat B = aij->B; /* offdiagonal part of the matrix */ 647 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 648 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 649 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 650 PetscInt *ailen = a->ilen,*aj = a->j; 651 PetscInt *bilen = b->ilen,*bj = b->j; 652 PetscInt am = aij->A->rmap->n,j; 653 PetscInt diag_so_far = 0,dnz; 654 PetscInt offd_so_far = 0,onz; 655 656 PetscFunctionBegin; 657 /* Iterate over all rows of the matrix */ 658 for (j=0; j<am; j++) { 659 dnz = onz = 0; 660 /* Iterate over all non-zero columns of the current row */ 661 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 662 /* If column is in the diagonal */ 663 if (mat_j[col] >= cstart && mat_j[col] < cend) { 664 aj[diag_so_far++] = mat_j[col] - cstart; 665 dnz++; 666 } else { /* off-diagonal entries */ 667 bj[offd_so_far++] = mat_j[col]; 668 onz++; 669 } 670 } 671 ailen[j] = dnz; 672 bilen[j] = onz; 673 } 674 PetscFunctionReturn(0); 675 } 676 677 /* 678 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 679 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 680 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 681 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 682 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 683 */ 684 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 685 { 686 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 687 Mat A = aij->A; /* diagonal part of the matrix */ 688 Mat B = aij->B; /* offdiagonal part of the matrix */ 689 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 690 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 691 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 692 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 693 PetscInt *ailen = a->ilen,*aj = a->j; 694 PetscInt *bilen = b->ilen,*bj = b->j; 695 PetscInt am = aij->A->rmap->n,j; 696 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 697 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 698 PetscScalar *aa = a->a,*ba = b->a; 699 700 PetscFunctionBegin; 701 /* Iterate over all rows of the matrix */ 702 for (j=0; j<am; j++) { 703 dnz_row = onz_row = 0; 704 rowstart_offd = full_offd_i[j]; 705 rowstart_diag = full_diag_i[j]; 706 /* Iterate over all non-zero columns of the current row */ 707 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 708 /* If column is in the diagonal */ 709 if (mat_j[col] >= cstart && mat_j[col] < cend) { 710 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 711 aa[rowstart_diag+dnz_row] = mat_a[col]; 712 dnz_row++; 713 } else { /* off-diagonal entries */ 714 bj[rowstart_offd+onz_row] = mat_j[col]; 715 ba[rowstart_offd+onz_row] = mat_a[col]; 716 onz_row++; 717 } 718 } 719 ailen[j] = dnz_row; 720 bilen[j] = onz_row; 721 } 722 PetscFunctionReturn(0); 723 } 724 725 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 726 { 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 728 PetscErrorCode ierr; 729 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 730 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 731 732 PetscFunctionBegin; 733 for (i=0; i<m; i++) { 734 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 735 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 736 if (idxm[i] >= rstart && idxm[i] < rend) { 737 row = idxm[i] - rstart; 738 for (j=0; j<n; j++) { 739 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 740 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 741 if (idxn[j] >= cstart && idxn[j] < cend) { 742 col = idxn[j] - cstart; 743 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 744 } else { 745 if (!aij->colmap) { 746 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 747 } 748 #if defined(PETSC_USE_CTABLE) 749 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 750 col--; 751 #else 752 col = aij->colmap[idxn[j]] - 1; 753 #endif 754 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 755 else { 756 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 757 } 758 } 759 } 760 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 761 } 762 PetscFunctionReturn(0); 763 } 764 765 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 766 767 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 768 { 769 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 770 PetscErrorCode ierr; 771 PetscInt nstash,reallocs; 772 773 PetscFunctionBegin; 774 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 775 776 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 777 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 778 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 779 PetscFunctionReturn(0); 780 } 781 782 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 783 { 784 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 785 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 786 PetscErrorCode ierr; 787 PetscMPIInt n; 788 PetscInt i,j,rstart,ncols,flg; 789 PetscInt *row,*col; 790 PetscBool other_disassembled; 791 PetscScalar *val; 792 793 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 794 795 PetscFunctionBegin; 796 if (!aij->donotstash && !mat->nooffprocentries) { 797 while (1) { 798 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 799 if (!flg) break; 800 801 for (i=0; i<n; ) { 802 /* Now identify the consecutive vals belonging to the same row */ 803 for (j=i,rstart=row[j]; j<n; j++) { 804 if (row[j] != rstart) break; 805 } 806 if (j < n) ncols = j-i; 807 else ncols = n-i; 808 /* Now assemble all these values with a single function call */ 809 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 810 811 i = j; 812 } 813 } 814 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 815 } 816 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 817 if (mat->valid_GPU_matrix == PETSC_OFFLOAD_CPU) aij->A->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 818 #endif 819 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 820 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 821 822 /* determine if any processor has disassembled, if so we must 823 also disassemble ourself, in order that we may reassemble. */ 824 /* 825 if nonzero structure of submatrix B cannot change then we know that 826 no processor disassembled thus we can skip this stuff 827 */ 828 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 829 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 830 if (mat->was_assembled && !other_disassembled) { 831 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 832 aij->B->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 833 #endif 834 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 835 } 836 } 837 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 838 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 839 } 840 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 841 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 842 if (mat->valid_GPU_matrix == PETSC_OFFLOAD_CPU && aij->B->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) aij->B->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 843 #endif 844 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 845 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 846 847 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 848 849 aij->rowvalues = 0; 850 851 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 852 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 853 854 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 855 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 856 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 857 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 858 } 859 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 860 mat->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 861 #endif 862 PetscFunctionReturn(0); 863 } 864 865 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 866 { 867 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 868 PetscErrorCode ierr; 869 870 PetscFunctionBegin; 871 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 872 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 873 PetscFunctionReturn(0); 874 } 875 876 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 877 { 878 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 879 PetscObjectState sA, sB; 880 PetscInt *lrows; 881 PetscInt r, len; 882 PetscBool cong, lch, gch; 883 PetscErrorCode ierr; 884 885 PetscFunctionBegin; 886 /* get locally owned rows */ 887 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 888 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 889 /* fix right hand side if needed */ 890 if (x && b) { 891 const PetscScalar *xx; 892 PetscScalar *bb; 893 894 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 895 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 896 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 897 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 898 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 899 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 900 } 901 902 sA = mat->A->nonzerostate; 903 sB = mat->B->nonzerostate; 904 905 if (diag != 0.0 && cong) { 906 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 907 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 908 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 909 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 910 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 911 PetscInt nnwA, nnwB; 912 PetscBool nnzA, nnzB; 913 914 nnwA = aijA->nonew; 915 nnwB = aijB->nonew; 916 nnzA = aijA->keepnonzeropattern; 917 nnzB = aijB->keepnonzeropattern; 918 if (!nnzA) { 919 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 920 aijA->nonew = 0; 921 } 922 if (!nnzB) { 923 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 924 aijB->nonew = 0; 925 } 926 /* Must zero here before the next loop */ 927 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 928 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 929 for (r = 0; r < len; ++r) { 930 const PetscInt row = lrows[r] + A->rmap->rstart; 931 if (row >= A->cmap->N) continue; 932 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 933 } 934 aijA->nonew = nnwA; 935 aijB->nonew = nnwB; 936 } else { 937 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 938 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 939 } 940 ierr = PetscFree(lrows);CHKERRQ(ierr); 941 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 942 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 943 944 /* reduce nonzerostate */ 945 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 946 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 947 if (gch) A->nonzerostate++; 948 PetscFunctionReturn(0); 949 } 950 951 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 952 { 953 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 954 PetscErrorCode ierr; 955 PetscMPIInt n = A->rmap->n; 956 PetscInt i,j,r,m,p = 0,len = 0; 957 PetscInt *lrows,*owners = A->rmap->range; 958 PetscSFNode *rrows; 959 PetscSF sf; 960 const PetscScalar *xx; 961 PetscScalar *bb,*mask; 962 Vec xmask,lmask; 963 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 964 const PetscInt *aj, *ii,*ridx; 965 PetscScalar *aa; 966 967 PetscFunctionBegin; 968 /* Create SF where leaves are input rows and roots are owned rows */ 969 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 970 for (r = 0; r < n; ++r) lrows[r] = -1; 971 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 972 for (r = 0; r < N; ++r) { 973 const PetscInt idx = rows[r]; 974 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 975 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 976 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 977 } 978 rrows[r].rank = p; 979 rrows[r].index = rows[r] - owners[p]; 980 } 981 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 982 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 983 /* Collect flags for rows to be zeroed */ 984 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 985 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 986 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 987 /* Compress and put in row numbers */ 988 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 989 /* zero diagonal part of matrix */ 990 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 991 /* handle off diagonal part of matrix */ 992 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 993 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 994 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 995 for (i=0; i<len; i++) bb[lrows[i]] = 1; 996 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 997 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 999 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1000 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1001 PetscBool cong; 1002 1003 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1004 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1005 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1008 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1009 } 1010 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1011 /* remove zeroed rows of off diagonal matrix */ 1012 ii = aij->i; 1013 for (i=0; i<len; i++) { 1014 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1015 } 1016 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1017 if (aij->compressedrow.use) { 1018 m = aij->compressedrow.nrows; 1019 ii = aij->compressedrow.i; 1020 ridx = aij->compressedrow.rindex; 1021 for (i=0; i<m; i++) { 1022 n = ii[i+1] - ii[i]; 1023 aj = aij->j + ii[i]; 1024 aa = aij->a + ii[i]; 1025 1026 for (j=0; j<n; j++) { 1027 if (PetscAbsScalar(mask[*aj])) { 1028 if (b) bb[*ridx] -= *aa*xx[*aj]; 1029 *aa = 0.0; 1030 } 1031 aa++; 1032 aj++; 1033 } 1034 ridx++; 1035 } 1036 } else { /* do not use compressed row format */ 1037 m = l->B->rmap->n; 1038 for (i=0; i<m; i++) { 1039 n = ii[i+1] - ii[i]; 1040 aj = aij->j + ii[i]; 1041 aa = aij->a + ii[i]; 1042 for (j=0; j<n; j++) { 1043 if (PetscAbsScalar(mask[*aj])) { 1044 if (b) bb[i] -= *aa*xx[*aj]; 1045 *aa = 0.0; 1046 } 1047 aa++; 1048 aj++; 1049 } 1050 } 1051 } 1052 if (x && b) { 1053 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1054 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1055 } 1056 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1057 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1058 ierr = PetscFree(lrows);CHKERRQ(ierr); 1059 1060 /* only change matrix nonzero state if pattern was allowed to be changed */ 1061 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1062 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1063 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1071 PetscErrorCode ierr; 1072 PetscInt nt; 1073 VecScatter Mvctx = a->Mvctx; 1074 1075 PetscFunctionBegin; 1076 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1077 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1078 1079 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1080 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1081 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1082 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1087 { 1088 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1089 PetscErrorCode ierr; 1090 1091 PetscFunctionBegin; 1092 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1097 { 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 PetscErrorCode ierr; 1100 VecScatter Mvctx = a->Mvctx; 1101 1102 PetscFunctionBegin; 1103 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1104 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1105 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1106 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1107 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1108 PetscFunctionReturn(0); 1109 } 1110 1111 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1112 { 1113 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1114 PetscErrorCode ierr; 1115 1116 PetscFunctionBegin; 1117 /* do nondiagonal part */ 1118 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1119 /* do local part */ 1120 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1121 /* add partial results together */ 1122 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1123 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1128 { 1129 MPI_Comm comm; 1130 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1131 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1132 IS Me,Notme; 1133 PetscErrorCode ierr; 1134 PetscInt M,N,first,last,*notme,i; 1135 PetscBool lf; 1136 PetscMPIInt size; 1137 1138 PetscFunctionBegin; 1139 /* Easy test: symmetric diagonal block */ 1140 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1141 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1142 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1143 if (!*f) PetscFunctionReturn(0); 1144 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1145 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1146 if (size == 1) PetscFunctionReturn(0); 1147 1148 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1149 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1150 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1151 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1152 for (i=0; i<first; i++) notme[i] = i; 1153 for (i=last; i<M; i++) notme[i-last+first] = i; 1154 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1155 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1156 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1157 Aoff = Aoffs[0]; 1158 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1159 Boff = Boffs[0]; 1160 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1161 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1162 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1163 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1164 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1165 ierr = PetscFree(notme);CHKERRQ(ierr); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1170 { 1171 PetscErrorCode ierr; 1172 1173 PetscFunctionBegin; 1174 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1179 { 1180 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 /* do nondiagonal part */ 1185 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1186 /* do local part */ 1187 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1188 /* add partial results together */ 1189 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1190 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1191 PetscFunctionReturn(0); 1192 } 1193 1194 /* 1195 This only works correctly for square matrices where the subblock A->A is the 1196 diagonal block 1197 */ 1198 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1199 { 1200 PetscErrorCode ierr; 1201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1202 1203 PetscFunctionBegin; 1204 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1205 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1206 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1211 { 1212 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1213 PetscErrorCode ierr; 1214 1215 PetscFunctionBegin; 1216 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1217 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1218 PetscFunctionReturn(0); 1219 } 1220 1221 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1222 { 1223 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1224 PetscErrorCode ierr; 1225 1226 PetscFunctionBegin; 1227 #if defined(PETSC_USE_LOG) 1228 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1229 #endif 1230 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1231 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1232 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1233 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1234 #if defined(PETSC_USE_CTABLE) 1235 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1236 #else 1237 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1238 #endif 1239 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1240 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1241 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1242 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1243 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1244 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1245 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1246 1247 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1250 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1252 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1253 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1254 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1255 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1256 #if defined(PETSC_HAVE_ELEMENTAL) 1257 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1258 #endif 1259 #if defined(PETSC_HAVE_HYPRE) 1260 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1261 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1262 #endif 1263 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1264 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1269 { 1270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1271 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1272 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1273 PetscErrorCode ierr; 1274 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1275 int fd; 1276 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1277 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1278 PetscScalar *column_values; 1279 PetscInt message_count,flowcontrolcount; 1280 FILE *file; 1281 1282 PetscFunctionBegin; 1283 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1284 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1285 nz = A->nz + B->nz; 1286 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1287 if (!rank) { 1288 header[0] = MAT_FILE_CLASSID; 1289 header[1] = mat->rmap->N; 1290 header[2] = mat->cmap->N; 1291 1292 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1293 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1294 /* get largest number of rows any processor has */ 1295 rlen = mat->rmap->n; 1296 range = mat->rmap->range; 1297 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1298 } else { 1299 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1300 rlen = mat->rmap->n; 1301 } 1302 1303 /* load up the local row counts */ 1304 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1305 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1306 1307 /* store the row lengths to the file */ 1308 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1309 if (!rank) { 1310 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1311 for (i=1; i<size; i++) { 1312 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1313 rlen = range[i+1] - range[i]; 1314 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1315 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1316 } 1317 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1318 } else { 1319 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1320 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1321 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1322 } 1323 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1324 1325 /* load up the local column indices */ 1326 nzmax = nz; /* th processor needs space a largest processor needs */ 1327 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1328 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1329 cnt = 0; 1330 for (i=0; i<mat->rmap->n; i++) { 1331 for (j=B->i[i]; j<B->i[i+1]; j++) { 1332 if ((col = garray[B->j[j]]) > cstart) break; 1333 column_indices[cnt++] = col; 1334 } 1335 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1336 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1337 } 1338 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1339 1340 /* store the column indices to the file */ 1341 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1342 if (!rank) { 1343 MPI_Status status; 1344 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1345 for (i=1; i<size; i++) { 1346 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1347 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1348 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1349 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1350 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1351 } 1352 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1353 } else { 1354 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1355 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1356 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1357 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1358 } 1359 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1360 1361 /* load up the local column values */ 1362 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1363 cnt = 0; 1364 for (i=0; i<mat->rmap->n; i++) { 1365 for (j=B->i[i]; j<B->i[i+1]; j++) { 1366 if (garray[B->j[j]] > cstart) break; 1367 column_values[cnt++] = B->a[j]; 1368 } 1369 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1370 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1371 } 1372 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1373 1374 /* store the column values to the file */ 1375 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1376 if (!rank) { 1377 MPI_Status status; 1378 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1379 for (i=1; i<size; i++) { 1380 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1381 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1382 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1383 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1384 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1385 } 1386 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1387 } else { 1388 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1389 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1390 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1392 } 1393 ierr = PetscFree(column_values);CHKERRQ(ierr); 1394 1395 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1396 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1397 PetscFunctionReturn(0); 1398 } 1399 1400 #include <petscdraw.h> 1401 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1402 { 1403 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1404 PetscErrorCode ierr; 1405 PetscMPIInt rank = aij->rank,size = aij->size; 1406 PetscBool isdraw,iascii,isbinary; 1407 PetscViewer sviewer; 1408 PetscViewerFormat format; 1409 1410 PetscFunctionBegin; 1411 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1412 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1413 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1414 if (iascii) { 1415 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1416 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1417 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1418 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1419 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1420 for (i=0; i<(PetscInt)size; i++) { 1421 nmax = PetscMax(nmax,nz[i]); 1422 nmin = PetscMin(nmin,nz[i]); 1423 navg += nz[i]; 1424 } 1425 ierr = PetscFree(nz);CHKERRQ(ierr); 1426 navg = navg/size; 1427 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 } 1430 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1431 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1432 MatInfo info; 1433 PetscBool inodes; 1434 1435 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1436 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1437 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1438 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1439 if (!inodes) { 1440 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1441 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1442 } else { 1443 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1444 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1445 } 1446 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1447 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1448 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1449 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1450 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1451 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1452 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1453 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1454 PetscFunctionReturn(0); 1455 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1456 PetscInt inodecount,inodelimit,*inodes; 1457 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1458 if (inodes) { 1459 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1460 } else { 1461 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1462 } 1463 PetscFunctionReturn(0); 1464 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1465 PetscFunctionReturn(0); 1466 } 1467 } else if (isbinary) { 1468 if (size == 1) { 1469 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1470 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1471 } else { 1472 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1473 } 1474 PetscFunctionReturn(0); 1475 } else if (iascii && size == 1) { 1476 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1477 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1478 PetscFunctionReturn(0); 1479 } else if (isdraw) { 1480 PetscDraw draw; 1481 PetscBool isnull; 1482 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1483 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1484 if (isnull) PetscFunctionReturn(0); 1485 } 1486 1487 { /* assemble the entire matrix onto first processor */ 1488 Mat A = NULL, Av; 1489 IS isrow,iscol; 1490 1491 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1492 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1493 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1494 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1495 /* The commented code uses MatCreateSubMatrices instead */ 1496 /* 1497 Mat *AA, A = NULL, Av; 1498 IS isrow,iscol; 1499 1500 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1501 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1502 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1503 if (!rank) { 1504 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1505 A = AA[0]; 1506 Av = AA[0]; 1507 } 1508 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1509 */ 1510 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1511 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1512 /* 1513 Everyone has to call to draw the matrix since the graphics waits are 1514 synchronized across all processors that share the PetscDraw object 1515 */ 1516 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1517 if (!rank) { 1518 if (((PetscObject)mat)->name) { 1519 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1520 } 1521 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1522 } 1523 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1524 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1525 ierr = MatDestroy(&A);CHKERRQ(ierr); 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1531 { 1532 PetscErrorCode ierr; 1533 PetscBool iascii,isdraw,issocket,isbinary; 1534 1535 PetscFunctionBegin; 1536 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1537 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1538 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1539 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1540 if (iascii || isdraw || isbinary || issocket) { 1541 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1542 } 1543 PetscFunctionReturn(0); 1544 } 1545 1546 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1547 { 1548 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1549 PetscErrorCode ierr; 1550 Vec bb1 = 0; 1551 PetscBool hasop; 1552 1553 PetscFunctionBegin; 1554 if (flag == SOR_APPLY_UPPER) { 1555 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1556 PetscFunctionReturn(0); 1557 } 1558 1559 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1560 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1561 } 1562 1563 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1564 if (flag & SOR_ZERO_INITIAL_GUESS) { 1565 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1566 its--; 1567 } 1568 1569 while (its--) { 1570 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 1573 /* update rhs: bb1 = bb - B*x */ 1574 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1575 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1576 1577 /* local sweep */ 1578 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1579 } 1580 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1581 if (flag & SOR_ZERO_INITIAL_GUESS) { 1582 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1583 its--; 1584 } 1585 while (its--) { 1586 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 1589 /* update rhs: bb1 = bb - B*x */ 1590 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1591 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1592 1593 /* local sweep */ 1594 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1595 } 1596 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1597 if (flag & SOR_ZERO_INITIAL_GUESS) { 1598 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1599 its--; 1600 } 1601 while (its--) { 1602 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1604 1605 /* update rhs: bb1 = bb - B*x */ 1606 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1607 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1608 1609 /* local sweep */ 1610 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1611 } 1612 } else if (flag & SOR_EISENSTAT) { 1613 Vec xx1; 1614 1615 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1616 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1617 1618 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1620 if (!mat->diag) { 1621 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1622 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1623 } 1624 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1625 if (hasop) { 1626 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1627 } else { 1628 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1629 } 1630 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1631 1632 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1633 1634 /* local sweep */ 1635 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1636 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1637 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1638 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1639 1640 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1641 1642 matin->factorerrortype = mat->A->factorerrortype; 1643 PetscFunctionReturn(0); 1644 } 1645 1646 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1647 { 1648 Mat aA,aB,Aperm; 1649 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1650 PetscScalar *aa,*ba; 1651 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1652 PetscSF rowsf,sf; 1653 IS parcolp = NULL; 1654 PetscBool done; 1655 PetscErrorCode ierr; 1656 1657 PetscFunctionBegin; 1658 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1659 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1660 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1661 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1662 1663 /* Invert row permutation to find out where my rows should go */ 1664 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1665 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1666 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1667 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1668 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1669 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1670 1671 /* Invert column permutation to find out where my columns should go */ 1672 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1673 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1674 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1675 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1676 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1677 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1678 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1679 1680 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1681 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1682 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1683 1684 /* Find out where my gcols should go */ 1685 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1686 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1687 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1688 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1689 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1690 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1691 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1692 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1693 1694 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1695 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1696 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1697 for (i=0; i<m; i++) { 1698 PetscInt row = rdest[i],rowner; 1699 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1700 for (j=ai[i]; j<ai[i+1]; j++) { 1701 PetscInt cowner,col = cdest[aj[j]]; 1702 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1703 if (rowner == cowner) dnnz[i]++; 1704 else onnz[i]++; 1705 } 1706 for (j=bi[i]; j<bi[i+1]; j++) { 1707 PetscInt cowner,col = gcdest[bj[j]]; 1708 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1709 if (rowner == cowner) dnnz[i]++; 1710 else onnz[i]++; 1711 } 1712 } 1713 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1714 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1715 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1716 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1717 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1718 1719 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1720 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1721 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1722 for (i=0; i<m; i++) { 1723 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1724 PetscInt j0,rowlen; 1725 rowlen = ai[i+1] - ai[i]; 1726 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1727 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1728 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1729 } 1730 rowlen = bi[i+1] - bi[i]; 1731 for (j0=j=0; j<rowlen; j0=j) { 1732 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1733 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1734 } 1735 } 1736 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1737 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1738 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1739 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1740 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1741 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1742 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1743 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1744 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1745 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1746 *B = Aperm; 1747 PetscFunctionReturn(0); 1748 } 1749 1750 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1751 { 1752 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1753 PetscErrorCode ierr; 1754 1755 PetscFunctionBegin; 1756 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1757 if (ghosts) *ghosts = aij->garray; 1758 PetscFunctionReturn(0); 1759 } 1760 1761 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1762 { 1763 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1764 Mat A = mat->A,B = mat->B; 1765 PetscErrorCode ierr; 1766 PetscReal isend[5],irecv[5]; 1767 1768 PetscFunctionBegin; 1769 info->block_size = 1.0; 1770 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1771 1772 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1773 isend[3] = info->memory; isend[4] = info->mallocs; 1774 1775 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1776 1777 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1778 isend[3] += info->memory; isend[4] += info->mallocs; 1779 if (flag == MAT_LOCAL) { 1780 info->nz_used = isend[0]; 1781 info->nz_allocated = isend[1]; 1782 info->nz_unneeded = isend[2]; 1783 info->memory = isend[3]; 1784 info->mallocs = isend[4]; 1785 } else if (flag == MAT_GLOBAL_MAX) { 1786 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1787 1788 info->nz_used = irecv[0]; 1789 info->nz_allocated = irecv[1]; 1790 info->nz_unneeded = irecv[2]; 1791 info->memory = irecv[3]; 1792 info->mallocs = irecv[4]; 1793 } else if (flag == MAT_GLOBAL_SUM) { 1794 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1795 1796 info->nz_used = irecv[0]; 1797 info->nz_allocated = irecv[1]; 1798 info->nz_unneeded = irecv[2]; 1799 info->memory = irecv[3]; 1800 info->mallocs = irecv[4]; 1801 } 1802 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1803 info->fill_ratio_needed = 0; 1804 info->factor_mallocs = 0; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1809 { 1810 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1811 PetscErrorCode ierr; 1812 1813 PetscFunctionBegin; 1814 switch (op) { 1815 case MAT_NEW_NONZERO_LOCATIONS: 1816 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1817 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1818 case MAT_KEEP_NONZERO_PATTERN: 1819 case MAT_NEW_NONZERO_LOCATION_ERR: 1820 case MAT_USE_INODES: 1821 case MAT_IGNORE_ZERO_ENTRIES: 1822 MatCheckPreallocated(A,1); 1823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1824 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1825 break; 1826 case MAT_ROW_ORIENTED: 1827 MatCheckPreallocated(A,1); 1828 a->roworiented = flg; 1829 1830 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1831 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1832 break; 1833 case MAT_NEW_DIAGONALS: 1834 case MAT_SORTED_FULL: 1835 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1836 break; 1837 case MAT_IGNORE_OFF_PROC_ENTRIES: 1838 a->donotstash = flg; 1839 break; 1840 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1841 case MAT_SPD: 1842 case MAT_SYMMETRIC: 1843 case MAT_STRUCTURALLY_SYMMETRIC: 1844 case MAT_HERMITIAN: 1845 case MAT_SYMMETRY_ETERNAL: 1846 break; 1847 case MAT_SUBMAT_SINGLEIS: 1848 A->submat_singleis = flg; 1849 break; 1850 case MAT_STRUCTURE_ONLY: 1851 /* The option is handled directly by MatSetOption() */ 1852 break; 1853 default: 1854 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1860 { 1861 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1862 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1863 PetscErrorCode ierr; 1864 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1865 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1866 PetscInt *cmap,*idx_p; 1867 1868 PetscFunctionBegin; 1869 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1870 mat->getrowactive = PETSC_TRUE; 1871 1872 if (!mat->rowvalues && (idx || v)) { 1873 /* 1874 allocate enough space to hold information from the longest row. 1875 */ 1876 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1877 PetscInt max = 1,tmp; 1878 for (i=0; i<matin->rmap->n; i++) { 1879 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1880 if (max < tmp) max = tmp; 1881 } 1882 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1883 } 1884 1885 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1886 lrow = row - rstart; 1887 1888 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1889 if (!v) {pvA = 0; pvB = 0;} 1890 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1891 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1892 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1893 nztot = nzA + nzB; 1894 1895 cmap = mat->garray; 1896 if (v || idx) { 1897 if (nztot) { 1898 /* Sort by increasing column numbers, assuming A and B already sorted */ 1899 PetscInt imark = -1; 1900 if (v) { 1901 *v = v_p = mat->rowvalues; 1902 for (i=0; i<nzB; i++) { 1903 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1904 else break; 1905 } 1906 imark = i; 1907 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1908 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1909 } 1910 if (idx) { 1911 *idx = idx_p = mat->rowindices; 1912 if (imark > -1) { 1913 for (i=0; i<imark; i++) { 1914 idx_p[i] = cmap[cworkB[i]]; 1915 } 1916 } else { 1917 for (i=0; i<nzB; i++) { 1918 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1919 else break; 1920 } 1921 imark = i; 1922 } 1923 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1924 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1925 } 1926 } else { 1927 if (idx) *idx = 0; 1928 if (v) *v = 0; 1929 } 1930 } 1931 *nz = nztot; 1932 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1933 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1934 PetscFunctionReturn(0); 1935 } 1936 1937 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1938 { 1939 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1940 1941 PetscFunctionBegin; 1942 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1943 aij->getrowactive = PETSC_FALSE; 1944 PetscFunctionReturn(0); 1945 } 1946 1947 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1948 { 1949 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1950 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1951 PetscErrorCode ierr; 1952 PetscInt i,j,cstart = mat->cmap->rstart; 1953 PetscReal sum = 0.0; 1954 MatScalar *v; 1955 1956 PetscFunctionBegin; 1957 if (aij->size == 1) { 1958 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1959 } else { 1960 if (type == NORM_FROBENIUS) { 1961 v = amat->a; 1962 for (i=0; i<amat->nz; i++) { 1963 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1964 } 1965 v = bmat->a; 1966 for (i=0; i<bmat->nz; i++) { 1967 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1968 } 1969 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1970 *norm = PetscSqrtReal(*norm); 1971 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1972 } else if (type == NORM_1) { /* max column norm */ 1973 PetscReal *tmp,*tmp2; 1974 PetscInt *jj,*garray = aij->garray; 1975 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1976 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1977 *norm = 0.0; 1978 v = amat->a; jj = amat->j; 1979 for (j=0; j<amat->nz; j++) { 1980 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1981 } 1982 v = bmat->a; jj = bmat->j; 1983 for (j=0; j<bmat->nz; j++) { 1984 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1985 } 1986 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1987 for (j=0; j<mat->cmap->N; j++) { 1988 if (tmp2[j] > *norm) *norm = tmp2[j]; 1989 } 1990 ierr = PetscFree(tmp);CHKERRQ(ierr); 1991 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1992 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1993 } else if (type == NORM_INFINITY) { /* max row norm */ 1994 PetscReal ntemp = 0.0; 1995 for (j=0; j<aij->A->rmap->n; j++) { 1996 v = amat->a + amat->i[j]; 1997 sum = 0.0; 1998 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1999 sum += PetscAbsScalar(*v); v++; 2000 } 2001 v = bmat->a + bmat->i[j]; 2002 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2003 sum += PetscAbsScalar(*v); v++; 2004 } 2005 if (sum > ntemp) ntemp = sum; 2006 } 2007 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2008 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2009 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2010 } 2011 PetscFunctionReturn(0); 2012 } 2013 2014 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2015 { 2016 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2017 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2018 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2019 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2020 PetscErrorCode ierr; 2021 Mat B,A_diag,*B_diag; 2022 const MatScalar *array; 2023 2024 PetscFunctionBegin; 2025 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2026 ai = Aloc->i; aj = Aloc->j; 2027 bi = Bloc->i; bj = Bloc->j; 2028 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2029 PetscInt *d_nnz,*g_nnz,*o_nnz; 2030 PetscSFNode *oloc; 2031 PETSC_UNUSED PetscSF sf; 2032 2033 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2034 /* compute d_nnz for preallocation */ 2035 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2036 for (i=0; i<ai[ma]; i++) { 2037 d_nnz[aj[i]]++; 2038 } 2039 /* compute local off-diagonal contributions */ 2040 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2041 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2042 /* map those to global */ 2043 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2044 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2045 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2046 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2047 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2048 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2049 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2050 2051 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2052 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2053 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2054 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2055 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2056 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2057 } else { 2058 B = *matout; 2059 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2060 } 2061 2062 b = (Mat_MPIAIJ*)B->data; 2063 A_diag = a->A; 2064 B_diag = &b->A; 2065 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2066 A_diag_ncol = A_diag->cmap->N; 2067 B_diag_ilen = sub_B_diag->ilen; 2068 B_diag_i = sub_B_diag->i; 2069 2070 /* Set ilen for diagonal of B */ 2071 for (i=0; i<A_diag_ncol; i++) { 2072 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2073 } 2074 2075 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2076 very quickly (=without using MatSetValues), because all writes are local. */ 2077 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2078 2079 /* copy over the B part */ 2080 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2081 array = Bloc->a; 2082 row = A->rmap->rstart; 2083 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2084 cols_tmp = cols; 2085 for (i=0; i<mb; i++) { 2086 ncol = bi[i+1]-bi[i]; 2087 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2088 row++; 2089 array += ncol; cols_tmp += ncol; 2090 } 2091 ierr = PetscFree(cols);CHKERRQ(ierr); 2092 2093 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2094 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2095 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2096 *matout = B; 2097 } else { 2098 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2099 } 2100 PetscFunctionReturn(0); 2101 } 2102 2103 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2104 { 2105 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2106 Mat a = aij->A,b = aij->B; 2107 PetscErrorCode ierr; 2108 PetscInt s1,s2,s3; 2109 2110 PetscFunctionBegin; 2111 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2112 if (rr) { 2113 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2114 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2115 /* Overlap communication with computation. */ 2116 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2117 } 2118 if (ll) { 2119 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2120 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2121 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2122 } 2123 /* scale the diagonal block */ 2124 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2125 2126 if (rr) { 2127 /* Do a scatter end and then right scale the off-diagonal block */ 2128 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2129 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2130 } 2131 PetscFunctionReturn(0); 2132 } 2133 2134 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2135 { 2136 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2137 PetscErrorCode ierr; 2138 2139 PetscFunctionBegin; 2140 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2145 { 2146 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2147 Mat a,b,c,d; 2148 PetscBool flg; 2149 PetscErrorCode ierr; 2150 2151 PetscFunctionBegin; 2152 a = matA->A; b = matA->B; 2153 c = matB->A; d = matB->B; 2154 2155 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2156 if (flg) { 2157 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2158 } 2159 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2160 PetscFunctionReturn(0); 2161 } 2162 2163 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2164 { 2165 PetscErrorCode ierr; 2166 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2167 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2168 2169 PetscFunctionBegin; 2170 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2171 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2172 /* because of the column compression in the off-processor part of the matrix a->B, 2173 the number of columns in a->B and b->B may be different, hence we cannot call 2174 the MatCopy() directly on the two parts. If need be, we can provide a more 2175 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2176 then copying the submatrices */ 2177 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2178 } else { 2179 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2180 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2181 } 2182 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2183 PetscFunctionReturn(0); 2184 } 2185 2186 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2187 { 2188 PetscErrorCode ierr; 2189 2190 PetscFunctionBegin; 2191 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2192 PetscFunctionReturn(0); 2193 } 2194 2195 /* 2196 Computes the number of nonzeros per row needed for preallocation when X and Y 2197 have different nonzero structure. 2198 */ 2199 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2200 { 2201 PetscInt i,j,k,nzx,nzy; 2202 2203 PetscFunctionBegin; 2204 /* Set the number of nonzeros in the new matrix */ 2205 for (i=0; i<m; i++) { 2206 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2207 nzx = xi[i+1] - xi[i]; 2208 nzy = yi[i+1] - yi[i]; 2209 nnz[i] = 0; 2210 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2211 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2212 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2213 nnz[i]++; 2214 } 2215 for (; k<nzy; k++) nnz[i]++; 2216 } 2217 PetscFunctionReturn(0); 2218 } 2219 2220 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2221 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2222 { 2223 PetscErrorCode ierr; 2224 PetscInt m = Y->rmap->N; 2225 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2226 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2227 2228 PetscFunctionBegin; 2229 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2234 { 2235 PetscErrorCode ierr; 2236 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2237 PetscBLASInt bnz,one=1; 2238 Mat_SeqAIJ *x,*y; 2239 2240 PetscFunctionBegin; 2241 if (str == SAME_NONZERO_PATTERN) { 2242 PetscScalar alpha = a; 2243 x = (Mat_SeqAIJ*)xx->A->data; 2244 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2245 y = (Mat_SeqAIJ*)yy->A->data; 2246 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2247 x = (Mat_SeqAIJ*)xx->B->data; 2248 y = (Mat_SeqAIJ*)yy->B->data; 2249 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2250 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2251 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2252 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2253 will be updated */ 2254 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2255 if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) { 2256 Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 2257 } 2258 #endif 2259 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2260 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2261 } else { 2262 Mat B; 2263 PetscInt *nnz_d,*nnz_o; 2264 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2265 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2266 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2267 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2268 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2269 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2270 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2271 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2272 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2273 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2274 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2275 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2276 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2277 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2278 } 2279 PetscFunctionReturn(0); 2280 } 2281 2282 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2283 2284 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2285 { 2286 #if defined(PETSC_USE_COMPLEX) 2287 PetscErrorCode ierr; 2288 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2289 2290 PetscFunctionBegin; 2291 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2292 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2293 #else 2294 PetscFunctionBegin; 2295 #endif 2296 PetscFunctionReturn(0); 2297 } 2298 2299 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2300 { 2301 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2302 PetscErrorCode ierr; 2303 2304 PetscFunctionBegin; 2305 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2306 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2307 PetscFunctionReturn(0); 2308 } 2309 2310 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2311 { 2312 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2313 PetscErrorCode ierr; 2314 2315 PetscFunctionBegin; 2316 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2317 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2318 PetscFunctionReturn(0); 2319 } 2320 2321 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2322 { 2323 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2324 PetscErrorCode ierr; 2325 PetscInt i,*idxb = 0; 2326 PetscScalar *va,*vb; 2327 Vec vtmp; 2328 2329 PetscFunctionBegin; 2330 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2331 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2332 if (idx) { 2333 for (i=0; i<A->rmap->n; i++) { 2334 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2335 } 2336 } 2337 2338 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2339 if (idx) { 2340 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2341 } 2342 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2343 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2344 2345 for (i=0; i<A->rmap->n; i++) { 2346 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2347 va[i] = vb[i]; 2348 if (idx) idx[i] = a->garray[idxb[i]]; 2349 } 2350 } 2351 2352 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2353 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2354 ierr = PetscFree(idxb);CHKERRQ(ierr); 2355 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2356 PetscFunctionReturn(0); 2357 } 2358 2359 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2360 { 2361 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2362 PetscErrorCode ierr; 2363 PetscInt i,*idxb = 0; 2364 PetscScalar *va,*vb; 2365 Vec vtmp; 2366 2367 PetscFunctionBegin; 2368 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2369 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2370 if (idx) { 2371 for (i=0; i<A->cmap->n; i++) { 2372 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2373 } 2374 } 2375 2376 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2377 if (idx) { 2378 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2379 } 2380 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2381 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2382 2383 for (i=0; i<A->rmap->n; i++) { 2384 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2385 va[i] = vb[i]; 2386 if (idx) idx[i] = a->garray[idxb[i]]; 2387 } 2388 } 2389 2390 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2391 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2392 ierr = PetscFree(idxb);CHKERRQ(ierr); 2393 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2398 { 2399 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2400 PetscInt n = A->rmap->n; 2401 PetscInt cstart = A->cmap->rstart; 2402 PetscInt *cmap = mat->garray; 2403 PetscInt *diagIdx, *offdiagIdx; 2404 Vec diagV, offdiagV; 2405 PetscScalar *a, *diagA, *offdiagA; 2406 PetscInt r; 2407 PetscErrorCode ierr; 2408 2409 PetscFunctionBegin; 2410 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2411 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2412 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2413 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2414 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2415 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2416 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2417 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2418 for (r = 0; r < n; ++r) { 2419 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2420 a[r] = diagA[r]; 2421 idx[r] = cstart + diagIdx[r]; 2422 } else { 2423 a[r] = offdiagA[r]; 2424 idx[r] = cmap[offdiagIdx[r]]; 2425 } 2426 } 2427 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2428 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2429 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2430 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2431 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2432 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2437 { 2438 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2439 PetscInt n = A->rmap->n; 2440 PetscInt cstart = A->cmap->rstart; 2441 PetscInt *cmap = mat->garray; 2442 PetscInt *diagIdx, *offdiagIdx; 2443 Vec diagV, offdiagV; 2444 PetscScalar *a, *diagA, *offdiagA; 2445 PetscInt r; 2446 PetscErrorCode ierr; 2447 2448 PetscFunctionBegin; 2449 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2450 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2451 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2452 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2453 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2454 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2455 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2456 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2457 for (r = 0; r < n; ++r) { 2458 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2459 a[r] = diagA[r]; 2460 idx[r] = cstart + diagIdx[r]; 2461 } else { 2462 a[r] = offdiagA[r]; 2463 idx[r] = cmap[offdiagIdx[r]]; 2464 } 2465 } 2466 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2467 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2468 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2469 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2470 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2471 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2476 { 2477 PetscErrorCode ierr; 2478 Mat *dummy; 2479 2480 PetscFunctionBegin; 2481 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2482 *newmat = *dummy; 2483 ierr = PetscFree(dummy);CHKERRQ(ierr); 2484 PetscFunctionReturn(0); 2485 } 2486 2487 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2488 { 2489 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2490 PetscErrorCode ierr; 2491 2492 PetscFunctionBegin; 2493 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2494 A->factorerrortype = a->A->factorerrortype; 2495 PetscFunctionReturn(0); 2496 } 2497 2498 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2499 { 2500 PetscErrorCode ierr; 2501 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2502 2503 PetscFunctionBegin; 2504 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2505 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2506 if (x->assembled) { 2507 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2508 } else { 2509 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2510 } 2511 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2512 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2517 { 2518 PetscFunctionBegin; 2519 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2520 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2521 PetscFunctionReturn(0); 2522 } 2523 2524 /*@ 2525 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2526 2527 Collective on Mat 2528 2529 Input Parameters: 2530 + A - the matrix 2531 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2532 2533 Level: advanced 2534 2535 @*/ 2536 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2537 { 2538 PetscErrorCode ierr; 2539 2540 PetscFunctionBegin; 2541 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2546 { 2547 PetscErrorCode ierr; 2548 PetscBool sc = PETSC_FALSE,flg; 2549 2550 PetscFunctionBegin; 2551 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2552 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2553 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2554 if (flg) { 2555 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2556 } 2557 ierr = PetscOptionsTail();CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2562 { 2563 PetscErrorCode ierr; 2564 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2565 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2566 2567 PetscFunctionBegin; 2568 if (!Y->preallocated) { 2569 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2570 } else if (!aij->nz) { 2571 PetscInt nonew = aij->nonew; 2572 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2573 aij->nonew = nonew; 2574 } 2575 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2576 PetscFunctionReturn(0); 2577 } 2578 2579 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2580 { 2581 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2582 PetscErrorCode ierr; 2583 2584 PetscFunctionBegin; 2585 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2586 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2587 if (d) { 2588 PetscInt rstart; 2589 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2590 *d += rstart; 2591 2592 } 2593 PetscFunctionReturn(0); 2594 } 2595 2596 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2597 { 2598 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2599 PetscErrorCode ierr; 2600 2601 PetscFunctionBegin; 2602 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2603 PetscFunctionReturn(0); 2604 } 2605 2606 /* -------------------------------------------------------------------*/ 2607 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2608 MatGetRow_MPIAIJ, 2609 MatRestoreRow_MPIAIJ, 2610 MatMult_MPIAIJ, 2611 /* 4*/ MatMultAdd_MPIAIJ, 2612 MatMultTranspose_MPIAIJ, 2613 MatMultTransposeAdd_MPIAIJ, 2614 0, 2615 0, 2616 0, 2617 /*10*/ 0, 2618 0, 2619 0, 2620 MatSOR_MPIAIJ, 2621 MatTranspose_MPIAIJ, 2622 /*15*/ MatGetInfo_MPIAIJ, 2623 MatEqual_MPIAIJ, 2624 MatGetDiagonal_MPIAIJ, 2625 MatDiagonalScale_MPIAIJ, 2626 MatNorm_MPIAIJ, 2627 /*20*/ MatAssemblyBegin_MPIAIJ, 2628 MatAssemblyEnd_MPIAIJ, 2629 MatSetOption_MPIAIJ, 2630 MatZeroEntries_MPIAIJ, 2631 /*24*/ MatZeroRows_MPIAIJ, 2632 0, 2633 0, 2634 0, 2635 0, 2636 /*29*/ MatSetUp_MPIAIJ, 2637 0, 2638 0, 2639 MatGetDiagonalBlock_MPIAIJ, 2640 0, 2641 /*34*/ MatDuplicate_MPIAIJ, 2642 0, 2643 0, 2644 0, 2645 0, 2646 /*39*/ MatAXPY_MPIAIJ, 2647 MatCreateSubMatrices_MPIAIJ, 2648 MatIncreaseOverlap_MPIAIJ, 2649 MatGetValues_MPIAIJ, 2650 MatCopy_MPIAIJ, 2651 /*44*/ MatGetRowMax_MPIAIJ, 2652 MatScale_MPIAIJ, 2653 MatShift_MPIAIJ, 2654 MatDiagonalSet_MPIAIJ, 2655 MatZeroRowsColumns_MPIAIJ, 2656 /*49*/ MatSetRandom_MPIAIJ, 2657 0, 2658 0, 2659 0, 2660 0, 2661 /*54*/ MatFDColoringCreate_MPIXAIJ, 2662 0, 2663 MatSetUnfactored_MPIAIJ, 2664 MatPermute_MPIAIJ, 2665 0, 2666 /*59*/ MatCreateSubMatrix_MPIAIJ, 2667 MatDestroy_MPIAIJ, 2668 MatView_MPIAIJ, 2669 0, 2670 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2671 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2672 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2673 0, 2674 0, 2675 0, 2676 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2677 MatGetRowMinAbs_MPIAIJ, 2678 0, 2679 0, 2680 0, 2681 0, 2682 /*75*/ MatFDColoringApply_AIJ, 2683 MatSetFromOptions_MPIAIJ, 2684 0, 2685 0, 2686 MatFindZeroDiagonals_MPIAIJ, 2687 /*80*/ 0, 2688 0, 2689 0, 2690 /*83*/ MatLoad_MPIAIJ, 2691 MatIsSymmetric_MPIAIJ, 2692 0, 2693 0, 2694 0, 2695 0, 2696 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2697 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2698 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2699 MatPtAP_MPIAIJ_MPIAIJ, 2700 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2701 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2702 0, 2703 0, 2704 0, 2705 0, 2706 /*99*/ 0, 2707 0, 2708 0, 2709 MatConjugate_MPIAIJ, 2710 0, 2711 /*104*/MatSetValuesRow_MPIAIJ, 2712 MatRealPart_MPIAIJ, 2713 MatImaginaryPart_MPIAIJ, 2714 0, 2715 0, 2716 /*109*/0, 2717 0, 2718 MatGetRowMin_MPIAIJ, 2719 0, 2720 MatMissingDiagonal_MPIAIJ, 2721 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2722 0, 2723 MatGetGhosts_MPIAIJ, 2724 0, 2725 0, 2726 /*119*/0, 2727 0, 2728 0, 2729 0, 2730 MatGetMultiProcBlock_MPIAIJ, 2731 /*124*/MatFindNonzeroRows_MPIAIJ, 2732 MatGetColumnNorms_MPIAIJ, 2733 MatInvertBlockDiagonal_MPIAIJ, 2734 MatInvertVariableBlockDiagonal_MPIAIJ, 2735 MatCreateSubMatricesMPI_MPIAIJ, 2736 /*129*/0, 2737 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2738 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2739 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2740 0, 2741 /*134*/0, 2742 0, 2743 MatRARt_MPIAIJ_MPIAIJ, 2744 0, 2745 0, 2746 /*139*/MatSetBlockSizes_MPIAIJ, 2747 0, 2748 0, 2749 MatFDColoringSetUp_MPIXAIJ, 2750 MatFindOffBlockDiagonalEntries_MPIAIJ, 2751 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2752 }; 2753 2754 /* ----------------------------------------------------------------------------------------*/ 2755 2756 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2757 { 2758 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2759 PetscErrorCode ierr; 2760 2761 PetscFunctionBegin; 2762 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2763 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2764 PetscFunctionReturn(0); 2765 } 2766 2767 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2768 { 2769 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2770 PetscErrorCode ierr; 2771 2772 PetscFunctionBegin; 2773 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2774 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2775 PetscFunctionReturn(0); 2776 } 2777 2778 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2779 { 2780 Mat_MPIAIJ *b; 2781 PetscErrorCode ierr; 2782 PetscMPIInt size; 2783 2784 PetscFunctionBegin; 2785 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2786 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2787 b = (Mat_MPIAIJ*)B->data; 2788 2789 #if defined(PETSC_USE_CTABLE) 2790 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2791 #else 2792 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2793 #endif 2794 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2795 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2796 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2797 2798 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2799 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2800 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2801 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2802 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2803 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2804 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2805 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2806 2807 if (!B->preallocated) { 2808 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2809 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2810 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2811 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2812 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2813 } 2814 2815 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2816 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2817 B->preallocated = PETSC_TRUE; 2818 B->was_assembled = PETSC_FALSE; 2819 B->assembled = PETSC_FALSE; 2820 PetscFunctionReturn(0); 2821 } 2822 2823 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2824 { 2825 Mat_MPIAIJ *b; 2826 PetscErrorCode ierr; 2827 2828 PetscFunctionBegin; 2829 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2830 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2831 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2832 b = (Mat_MPIAIJ*)B->data; 2833 2834 #if defined(PETSC_USE_CTABLE) 2835 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2836 #else 2837 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2838 #endif 2839 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2840 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2841 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2842 2843 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2844 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2845 B->preallocated = PETSC_TRUE; 2846 B->was_assembled = PETSC_FALSE; 2847 B->assembled = PETSC_FALSE; 2848 PetscFunctionReturn(0); 2849 } 2850 2851 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2852 { 2853 Mat mat; 2854 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2855 PetscErrorCode ierr; 2856 2857 PetscFunctionBegin; 2858 *newmat = 0; 2859 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2860 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2861 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2862 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2863 a = (Mat_MPIAIJ*)mat->data; 2864 2865 mat->factortype = matin->factortype; 2866 mat->assembled = PETSC_TRUE; 2867 mat->insertmode = NOT_SET_VALUES; 2868 mat->preallocated = PETSC_TRUE; 2869 2870 a->size = oldmat->size; 2871 a->rank = oldmat->rank; 2872 a->donotstash = oldmat->donotstash; 2873 a->roworiented = oldmat->roworiented; 2874 a->rowindices = 0; 2875 a->rowvalues = 0; 2876 a->getrowactive = PETSC_FALSE; 2877 2878 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2879 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2880 2881 if (oldmat->colmap) { 2882 #if defined(PETSC_USE_CTABLE) 2883 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2884 #else 2885 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2886 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2887 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2888 #endif 2889 } else a->colmap = 0; 2890 if (oldmat->garray) { 2891 PetscInt len; 2892 len = oldmat->B->cmap->n; 2893 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2894 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2895 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2896 } else a->garray = 0; 2897 2898 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2899 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2900 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2901 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2902 2903 if (oldmat->Mvctx_mpi1) { 2904 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2905 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2906 } 2907 2908 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2909 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2910 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2911 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2912 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2913 *newmat = mat; 2914 PetscFunctionReturn(0); 2915 } 2916 2917 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2918 { 2919 PetscBool isbinary, ishdf5; 2920 PetscErrorCode ierr; 2921 2922 PetscFunctionBegin; 2923 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2924 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2925 /* force binary viewer to load .info file if it has not yet done so */ 2926 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2927 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2928 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2929 if (isbinary) { 2930 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2931 } else if (ishdf5) { 2932 #if defined(PETSC_HAVE_HDF5) 2933 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2934 #else 2935 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2936 #endif 2937 } else { 2938 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2939 } 2940 PetscFunctionReturn(0); 2941 } 2942 2943 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2944 { 2945 PetscScalar *vals,*svals; 2946 MPI_Comm comm; 2947 PetscErrorCode ierr; 2948 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2949 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2950 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2951 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2952 PetscInt cend,cstart,n,*rowners; 2953 int fd; 2954 PetscInt bs = newMat->rmap->bs; 2955 2956 PetscFunctionBegin; 2957 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2958 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2959 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2960 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2961 if (!rank) { 2962 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2963 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2964 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2965 } 2966 2967 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2968 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2969 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2970 if (bs < 0) bs = 1; 2971 2972 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2973 M = header[1]; N = header[2]; 2974 2975 /* If global sizes are set, check if they are consistent with that given in the file */ 2976 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2977 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2978 2979 /* determine ownership of all (block) rows */ 2980 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2981 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2982 else m = newMat->rmap->n; /* Set by user */ 2983 2984 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2985 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2986 2987 /* First process needs enough room for process with most rows */ 2988 if (!rank) { 2989 mmax = rowners[1]; 2990 for (i=2; i<=size; i++) { 2991 mmax = PetscMax(mmax, rowners[i]); 2992 } 2993 } else mmax = -1; /* unused, but compilers complain */ 2994 2995 rowners[0] = 0; 2996 for (i=2; i<=size; i++) { 2997 rowners[i] += rowners[i-1]; 2998 } 2999 rstart = rowners[rank]; 3000 rend = rowners[rank+1]; 3001 3002 /* distribute row lengths to all processors */ 3003 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3004 if (!rank) { 3005 ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr); 3006 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3007 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3008 for (j=0; j<m; j++) { 3009 procsnz[0] += ourlens[j]; 3010 } 3011 for (i=1; i<size; i++) { 3012 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr); 3013 /* calculate the number of nonzeros on each processor */ 3014 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3015 procsnz[i] += rowlengths[j]; 3016 } 3017 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3018 } 3019 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3020 } else { 3021 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3022 } 3023 3024 if (!rank) { 3025 /* determine max buffer needed and allocate it */ 3026 maxnz = 0; 3027 for (i=0; i<size; i++) { 3028 maxnz = PetscMax(maxnz,procsnz[i]); 3029 } 3030 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3031 3032 /* read in my part of the matrix column indices */ 3033 nz = procsnz[0]; 3034 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3035 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3036 3037 /* read in every one elses and ship off */ 3038 for (i=1; i<size; i++) { 3039 nz = procsnz[i]; 3040 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3041 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3042 } 3043 ierr = PetscFree(cols);CHKERRQ(ierr); 3044 } else { 3045 /* determine buffer space needed for message */ 3046 nz = 0; 3047 for (i=0; i<m; i++) { 3048 nz += ourlens[i]; 3049 } 3050 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3051 3052 /* receive message of column indices*/ 3053 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3054 } 3055 3056 /* determine column ownership if matrix is not square */ 3057 if (N != M) { 3058 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3059 else n = newMat->cmap->n; 3060 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3061 cstart = cend - n; 3062 } else { 3063 cstart = rstart; 3064 cend = rend; 3065 n = cend - cstart; 3066 } 3067 3068 /* loop over local rows, determining number of off diagonal entries */ 3069 ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr); 3070 jj = 0; 3071 for (i=0; i<m; i++) { 3072 for (j=0; j<ourlens[i]; j++) { 3073 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3074 jj++; 3075 } 3076 } 3077 3078 for (i=0; i<m; i++) { 3079 ourlens[i] -= offlens[i]; 3080 } 3081 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3082 3083 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3084 3085 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3086 3087 for (i=0; i<m; i++) { 3088 ourlens[i] += offlens[i]; 3089 } 3090 3091 if (!rank) { 3092 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3093 3094 /* read in my part of the matrix numerical values */ 3095 nz = procsnz[0]; 3096 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3097 3098 /* insert into matrix */ 3099 jj = rstart; 3100 smycols = mycols; 3101 svals = vals; 3102 for (i=0; i<m; i++) { 3103 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3104 smycols += ourlens[i]; 3105 svals += ourlens[i]; 3106 jj++; 3107 } 3108 3109 /* read in other processors and ship out */ 3110 for (i=1; i<size; i++) { 3111 nz = procsnz[i]; 3112 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3113 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3114 } 3115 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3116 } else { 3117 /* receive numeric values */ 3118 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3119 3120 /* receive message of values*/ 3121 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3122 3123 /* insert into matrix */ 3124 jj = rstart; 3125 smycols = mycols; 3126 svals = vals; 3127 for (i=0; i<m; i++) { 3128 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3129 smycols += ourlens[i]; 3130 svals += ourlens[i]; 3131 jj++; 3132 } 3133 } 3134 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3135 ierr = PetscFree(vals);CHKERRQ(ierr); 3136 ierr = PetscFree(mycols);CHKERRQ(ierr); 3137 ierr = PetscFree(rowners);CHKERRQ(ierr); 3138 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3139 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3140 PetscFunctionReturn(0); 3141 } 3142 3143 /* Not scalable because of ISAllGather() unless getting all columns. */ 3144 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3145 { 3146 PetscErrorCode ierr; 3147 IS iscol_local; 3148 PetscBool isstride; 3149 PetscMPIInt lisstride=0,gisstride; 3150 3151 PetscFunctionBegin; 3152 /* check if we are grabbing all columns*/ 3153 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3154 3155 if (isstride) { 3156 PetscInt start,len,mstart,mlen; 3157 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3158 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3159 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3160 if (mstart == start && mlen-mstart == len) lisstride = 1; 3161 } 3162 3163 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3164 if (gisstride) { 3165 PetscInt N; 3166 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3167 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3168 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3169 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3170 } else { 3171 PetscInt cbs; 3172 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3173 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3174 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3175 } 3176 3177 *isseq = iscol_local; 3178 PetscFunctionReturn(0); 3179 } 3180 3181 /* 3182 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3183 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3184 3185 Input Parameters: 3186 mat - matrix 3187 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3188 i.e., mat->rstart <= isrow[i] < mat->rend 3189 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3190 i.e., mat->cstart <= iscol[i] < mat->cend 3191 Output Parameter: 3192 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3193 iscol_o - sequential column index set for retrieving mat->B 3194 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3195 */ 3196 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3197 { 3198 PetscErrorCode ierr; 3199 Vec x,cmap; 3200 const PetscInt *is_idx; 3201 PetscScalar *xarray,*cmaparray; 3202 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3203 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3204 Mat B=a->B; 3205 Vec lvec=a->lvec,lcmap; 3206 PetscInt i,cstart,cend,Bn=B->cmap->N; 3207 MPI_Comm comm; 3208 VecScatter Mvctx=a->Mvctx; 3209 3210 PetscFunctionBegin; 3211 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3212 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3213 3214 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3215 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3216 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3217 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3218 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3219 3220 /* Get start indices */ 3221 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3222 isstart -= ncols; 3223 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3224 3225 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3226 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3227 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3228 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3229 for (i=0; i<ncols; i++) { 3230 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3231 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3232 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3233 } 3234 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3235 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3236 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3237 3238 /* Get iscol_d */ 3239 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3240 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3241 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3242 3243 /* Get isrow_d */ 3244 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3245 rstart = mat->rmap->rstart; 3246 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3247 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3248 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3249 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3250 3251 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3252 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3253 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3254 3255 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3256 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3257 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3258 3259 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3260 3261 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3262 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3263 3264 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3265 /* off-process column indices */ 3266 count = 0; 3267 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3268 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3269 3270 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3271 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3272 for (i=0; i<Bn; i++) { 3273 if (PetscRealPart(xarray[i]) > -1.0) { 3274 idx[count] = i; /* local column index in off-diagonal part B */ 3275 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3276 count++; 3277 } 3278 } 3279 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3280 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3281 3282 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3283 /* cannot ensure iscol_o has same blocksize as iscol! */ 3284 3285 ierr = PetscFree(idx);CHKERRQ(ierr); 3286 *garray = cmap1; 3287 3288 ierr = VecDestroy(&x);CHKERRQ(ierr); 3289 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3290 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3291 PetscFunctionReturn(0); 3292 } 3293 3294 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3295 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3296 { 3297 PetscErrorCode ierr; 3298 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3299 Mat M = NULL; 3300 MPI_Comm comm; 3301 IS iscol_d,isrow_d,iscol_o; 3302 Mat Asub = NULL,Bsub = NULL; 3303 PetscInt n; 3304 3305 PetscFunctionBegin; 3306 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3307 3308 if (call == MAT_REUSE_MATRIX) { 3309 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3310 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3311 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3312 3313 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3314 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3315 3316 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3317 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3318 3319 /* Update diagonal and off-diagonal portions of submat */ 3320 asub = (Mat_MPIAIJ*)(*submat)->data; 3321 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3322 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3323 if (n) { 3324 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3325 } 3326 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3327 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3328 3329 } else { /* call == MAT_INITIAL_MATRIX) */ 3330 const PetscInt *garray; 3331 PetscInt BsubN; 3332 3333 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3334 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3335 3336 /* Create local submatrices Asub and Bsub */ 3337 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3338 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3339 3340 /* Create submatrix M */ 3341 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3342 3343 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3344 asub = (Mat_MPIAIJ*)M->data; 3345 3346 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3347 n = asub->B->cmap->N; 3348 if (BsubN > n) { 3349 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3350 const PetscInt *idx; 3351 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3352 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3353 3354 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3355 j = 0; 3356 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3357 for (i=0; i<n; i++) { 3358 if (j >= BsubN) break; 3359 while (subgarray[i] > garray[j]) j++; 3360 3361 if (subgarray[i] == garray[j]) { 3362 idx_new[i] = idx[j++]; 3363 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3364 } 3365 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3366 3367 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3368 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3369 3370 } else if (BsubN < n) { 3371 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3372 } 3373 3374 ierr = PetscFree(garray);CHKERRQ(ierr); 3375 *submat = M; 3376 3377 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3378 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3379 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3380 3381 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3382 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3383 3384 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3385 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3386 } 3387 PetscFunctionReturn(0); 3388 } 3389 3390 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3391 { 3392 PetscErrorCode ierr; 3393 IS iscol_local=NULL,isrow_d; 3394 PetscInt csize; 3395 PetscInt n,i,j,start,end; 3396 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3397 MPI_Comm comm; 3398 3399 PetscFunctionBegin; 3400 /* If isrow has same processor distribution as mat, 3401 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3402 if (call == MAT_REUSE_MATRIX) { 3403 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3404 if (isrow_d) { 3405 sameRowDist = PETSC_TRUE; 3406 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3407 } else { 3408 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3409 if (iscol_local) { 3410 sameRowDist = PETSC_TRUE; 3411 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3412 } 3413 } 3414 } else { 3415 /* Check if isrow has same processor distribution as mat */ 3416 sameDist[0] = PETSC_FALSE; 3417 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3418 if (!n) { 3419 sameDist[0] = PETSC_TRUE; 3420 } else { 3421 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3422 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3423 if (i >= start && j < end) { 3424 sameDist[0] = PETSC_TRUE; 3425 } 3426 } 3427 3428 /* Check if iscol has same processor distribution as mat */ 3429 sameDist[1] = PETSC_FALSE; 3430 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3431 if (!n) { 3432 sameDist[1] = PETSC_TRUE; 3433 } else { 3434 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3435 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3436 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3437 } 3438 3439 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3440 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3441 sameRowDist = tsameDist[0]; 3442 } 3443 3444 if (sameRowDist) { 3445 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3446 /* isrow and iscol have same processor distribution as mat */ 3447 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3448 PetscFunctionReturn(0); 3449 } else { /* sameRowDist */ 3450 /* isrow has same processor distribution as mat */ 3451 if (call == MAT_INITIAL_MATRIX) { 3452 PetscBool sorted; 3453 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3454 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3455 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3456 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3457 3458 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3459 if (sorted) { 3460 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3461 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3462 PetscFunctionReturn(0); 3463 } 3464 } else { /* call == MAT_REUSE_MATRIX */ 3465 IS iscol_sub; 3466 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3467 if (iscol_sub) { 3468 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3469 PetscFunctionReturn(0); 3470 } 3471 } 3472 } 3473 } 3474 3475 /* General case: iscol -> iscol_local which has global size of iscol */ 3476 if (call == MAT_REUSE_MATRIX) { 3477 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3478 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3479 } else { 3480 if (!iscol_local) { 3481 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3482 } 3483 } 3484 3485 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3486 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3487 3488 if (call == MAT_INITIAL_MATRIX) { 3489 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3490 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3491 } 3492 PetscFunctionReturn(0); 3493 } 3494 3495 /*@C 3496 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3497 and "off-diagonal" part of the matrix in CSR format. 3498 3499 Collective 3500 3501 Input Parameters: 3502 + comm - MPI communicator 3503 . A - "diagonal" portion of matrix 3504 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3505 - garray - global index of B columns 3506 3507 Output Parameter: 3508 . mat - the matrix, with input A as its local diagonal matrix 3509 Level: advanced 3510 3511 Notes: 3512 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3513 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3514 3515 .seealso: MatCreateMPIAIJWithSplitArrays() 3516 @*/ 3517 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3518 { 3519 PetscErrorCode ierr; 3520 Mat_MPIAIJ *maij; 3521 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3522 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3523 PetscScalar *oa=b->a; 3524 Mat Bnew; 3525 PetscInt m,n,N; 3526 3527 PetscFunctionBegin; 3528 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3529 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3530 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3531 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3532 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3533 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3534 3535 /* Get global columns of mat */ 3536 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3537 3538 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3539 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3540 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3541 maij = (Mat_MPIAIJ*)(*mat)->data; 3542 3543 (*mat)->preallocated = PETSC_TRUE; 3544 3545 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3546 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3547 3548 /* Set A as diagonal portion of *mat */ 3549 maij->A = A; 3550 3551 nz = oi[m]; 3552 for (i=0; i<nz; i++) { 3553 col = oj[i]; 3554 oj[i] = garray[col]; 3555 } 3556 3557 /* Set Bnew as off-diagonal portion of *mat */ 3558 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3559 bnew = (Mat_SeqAIJ*)Bnew->data; 3560 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3561 maij->B = Bnew; 3562 3563 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3564 3565 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3566 b->free_a = PETSC_FALSE; 3567 b->free_ij = PETSC_FALSE; 3568 ierr = MatDestroy(&B);CHKERRQ(ierr); 3569 3570 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3571 bnew->free_a = PETSC_TRUE; 3572 bnew->free_ij = PETSC_TRUE; 3573 3574 /* condense columns of maij->B */ 3575 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3576 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3577 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3578 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3579 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3580 PetscFunctionReturn(0); 3581 } 3582 3583 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3584 3585 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3586 { 3587 PetscErrorCode ierr; 3588 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3589 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3590 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3591 Mat M,Msub,B=a->B; 3592 MatScalar *aa; 3593 Mat_SeqAIJ *aij; 3594 PetscInt *garray = a->garray,*colsub,Ncols; 3595 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3596 IS iscol_sub,iscmap; 3597 const PetscInt *is_idx,*cmap; 3598 PetscBool allcolumns=PETSC_FALSE; 3599 MPI_Comm comm; 3600 3601 PetscFunctionBegin; 3602 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3603 3604 if (call == MAT_REUSE_MATRIX) { 3605 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3606 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3607 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3608 3609 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3610 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3611 3612 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3613 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3614 3615 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3616 3617 } else { /* call == MAT_INITIAL_MATRIX) */ 3618 PetscBool flg; 3619 3620 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3621 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3622 3623 /* (1) iscol -> nonscalable iscol_local */ 3624 /* Check for special case: each processor gets entire matrix columns */ 3625 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3626 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3627 if (allcolumns) { 3628 iscol_sub = iscol_local; 3629 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3630 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3631 3632 } else { 3633 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3634 PetscInt *idx,*cmap1,k; 3635 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3636 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3637 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3638 count = 0; 3639 k = 0; 3640 for (i=0; i<Ncols; i++) { 3641 j = is_idx[i]; 3642 if (j >= cstart && j < cend) { 3643 /* diagonal part of mat */ 3644 idx[count] = j; 3645 cmap1[count++] = i; /* column index in submat */ 3646 } else if (Bn) { 3647 /* off-diagonal part of mat */ 3648 if (j == garray[k]) { 3649 idx[count] = j; 3650 cmap1[count++] = i; /* column index in submat */ 3651 } else if (j > garray[k]) { 3652 while (j > garray[k] && k < Bn-1) k++; 3653 if (j == garray[k]) { 3654 idx[count] = j; 3655 cmap1[count++] = i; /* column index in submat */ 3656 } 3657 } 3658 } 3659 } 3660 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3661 3662 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3663 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3664 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3665 3666 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3667 } 3668 3669 /* (3) Create sequential Msub */ 3670 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3671 } 3672 3673 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3674 aij = (Mat_SeqAIJ*)(Msub)->data; 3675 ii = aij->i; 3676 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3677 3678 /* 3679 m - number of local rows 3680 Ncols - number of columns (same on all processors) 3681 rstart - first row in new global matrix generated 3682 */ 3683 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3684 3685 if (call == MAT_INITIAL_MATRIX) { 3686 /* (4) Create parallel newmat */ 3687 PetscMPIInt rank,size; 3688 PetscInt csize; 3689 3690 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3691 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3692 3693 /* 3694 Determine the number of non-zeros in the diagonal and off-diagonal 3695 portions of the matrix in order to do correct preallocation 3696 */ 3697 3698 /* first get start and end of "diagonal" columns */ 3699 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3700 if (csize == PETSC_DECIDE) { 3701 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3702 if (mglobal == Ncols) { /* square matrix */ 3703 nlocal = m; 3704 } else { 3705 nlocal = Ncols/size + ((Ncols % size) > rank); 3706 } 3707 } else { 3708 nlocal = csize; 3709 } 3710 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3711 rstart = rend - nlocal; 3712 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3713 3714 /* next, compute all the lengths */ 3715 jj = aij->j; 3716 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3717 olens = dlens + m; 3718 for (i=0; i<m; i++) { 3719 jend = ii[i+1] - ii[i]; 3720 olen = 0; 3721 dlen = 0; 3722 for (j=0; j<jend; j++) { 3723 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3724 else dlen++; 3725 jj++; 3726 } 3727 olens[i] = olen; 3728 dlens[i] = dlen; 3729 } 3730 3731 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3732 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3733 3734 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3735 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3736 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3737 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3738 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3739 ierr = PetscFree(dlens);CHKERRQ(ierr); 3740 3741 } else { /* call == MAT_REUSE_MATRIX */ 3742 M = *newmat; 3743 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3744 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3745 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3746 /* 3747 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3748 rather than the slower MatSetValues(). 3749 */ 3750 M->was_assembled = PETSC_TRUE; 3751 M->assembled = PETSC_FALSE; 3752 } 3753 3754 /* (5) Set values of Msub to *newmat */ 3755 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3756 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3757 3758 jj = aij->j; 3759 aa = aij->a; 3760 for (i=0; i<m; i++) { 3761 row = rstart + i; 3762 nz = ii[i+1] - ii[i]; 3763 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3764 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3765 jj += nz; aa += nz; 3766 } 3767 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3768 3769 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3770 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3771 3772 ierr = PetscFree(colsub);CHKERRQ(ierr); 3773 3774 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3775 if (call == MAT_INITIAL_MATRIX) { 3776 *newmat = M; 3777 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3778 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3779 3780 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3781 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3782 3783 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3784 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3785 3786 if (iscol_local) { 3787 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3788 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3789 } 3790 } 3791 PetscFunctionReturn(0); 3792 } 3793 3794 /* 3795 Not great since it makes two copies of the submatrix, first an SeqAIJ 3796 in local and then by concatenating the local matrices the end result. 3797 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3798 3799 Note: This requires a sequential iscol with all indices. 3800 */ 3801 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3802 { 3803 PetscErrorCode ierr; 3804 PetscMPIInt rank,size; 3805 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3806 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3807 Mat M,Mreuse; 3808 MatScalar *aa,*vwork; 3809 MPI_Comm comm; 3810 Mat_SeqAIJ *aij; 3811 PetscBool colflag,allcolumns=PETSC_FALSE; 3812 3813 PetscFunctionBegin; 3814 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3815 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3816 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3817 3818 /* Check for special case: each processor gets entire matrix columns */ 3819 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3820 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3821 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3822 3823 if (call == MAT_REUSE_MATRIX) { 3824 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3825 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3826 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3827 } else { 3828 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3829 } 3830 3831 /* 3832 m - number of local rows 3833 n - number of columns (same on all processors) 3834 rstart - first row in new global matrix generated 3835 */ 3836 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3837 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3838 if (call == MAT_INITIAL_MATRIX) { 3839 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3840 ii = aij->i; 3841 jj = aij->j; 3842 3843 /* 3844 Determine the number of non-zeros in the diagonal and off-diagonal 3845 portions of the matrix in order to do correct preallocation 3846 */ 3847 3848 /* first get start and end of "diagonal" columns */ 3849 if (csize == PETSC_DECIDE) { 3850 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3851 if (mglobal == n) { /* square matrix */ 3852 nlocal = m; 3853 } else { 3854 nlocal = n/size + ((n % size) > rank); 3855 } 3856 } else { 3857 nlocal = csize; 3858 } 3859 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3860 rstart = rend - nlocal; 3861 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3862 3863 /* next, compute all the lengths */ 3864 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3865 olens = dlens + m; 3866 for (i=0; i<m; i++) { 3867 jend = ii[i+1] - ii[i]; 3868 olen = 0; 3869 dlen = 0; 3870 for (j=0; j<jend; j++) { 3871 if (*jj < rstart || *jj >= rend) olen++; 3872 else dlen++; 3873 jj++; 3874 } 3875 olens[i] = olen; 3876 dlens[i] = dlen; 3877 } 3878 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3879 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3880 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3881 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3882 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3883 ierr = PetscFree(dlens);CHKERRQ(ierr); 3884 } else { 3885 PetscInt ml,nl; 3886 3887 M = *newmat; 3888 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3889 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3890 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3891 /* 3892 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3893 rather than the slower MatSetValues(). 3894 */ 3895 M->was_assembled = PETSC_TRUE; 3896 M->assembled = PETSC_FALSE; 3897 } 3898 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3899 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3900 ii = aij->i; 3901 jj = aij->j; 3902 aa = aij->a; 3903 for (i=0; i<m; i++) { 3904 row = rstart + i; 3905 nz = ii[i+1] - ii[i]; 3906 cwork = jj; jj += nz; 3907 vwork = aa; aa += nz; 3908 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3909 } 3910 3911 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3912 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3913 *newmat = M; 3914 3915 /* save submatrix used in processor for next request */ 3916 if (call == MAT_INITIAL_MATRIX) { 3917 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3918 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3919 } 3920 PetscFunctionReturn(0); 3921 } 3922 3923 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3924 { 3925 PetscInt m,cstart, cend,j,nnz,i,d; 3926 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3927 const PetscInt *JJ; 3928 PetscErrorCode ierr; 3929 PetscBool nooffprocentries; 3930 3931 PetscFunctionBegin; 3932 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3933 3934 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3935 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3936 m = B->rmap->n; 3937 cstart = B->cmap->rstart; 3938 cend = B->cmap->rend; 3939 rstart = B->rmap->rstart; 3940 3941 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3942 3943 #if defined(PETSC_USE_DEBUG) 3944 for (i=0; i<m; i++) { 3945 nnz = Ii[i+1]- Ii[i]; 3946 JJ = J + Ii[i]; 3947 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3948 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3949 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3950 } 3951 #endif 3952 3953 for (i=0; i<m; i++) { 3954 nnz = Ii[i+1]- Ii[i]; 3955 JJ = J + Ii[i]; 3956 nnz_max = PetscMax(nnz_max,nnz); 3957 d = 0; 3958 for (j=0; j<nnz; j++) { 3959 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3960 } 3961 d_nnz[i] = d; 3962 o_nnz[i] = nnz - d; 3963 } 3964 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3965 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3966 3967 for (i=0; i<m; i++) { 3968 ii = i + rstart; 3969 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3970 } 3971 nooffprocentries = B->nooffprocentries; 3972 B->nooffprocentries = PETSC_TRUE; 3973 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3974 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3975 B->nooffprocentries = nooffprocentries; 3976 3977 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3978 PetscFunctionReturn(0); 3979 } 3980 3981 /*@ 3982 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3983 (the default parallel PETSc format). 3984 3985 Collective 3986 3987 Input Parameters: 3988 + B - the matrix 3989 . i - the indices into j for the start of each local row (starts with zero) 3990 . j - the column indices for each local row (starts with zero) 3991 - v - optional values in the matrix 3992 3993 Level: developer 3994 3995 Notes: 3996 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3997 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3998 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3999 4000 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4001 4002 The format which is used for the sparse matrix input, is equivalent to a 4003 row-major ordering.. i.e for the following matrix, the input data expected is 4004 as shown 4005 4006 $ 1 0 0 4007 $ 2 0 3 P0 4008 $ ------- 4009 $ 4 5 6 P1 4010 $ 4011 $ Process0 [P0]: rows_owned=[0,1] 4012 $ i = {0,1,3} [size = nrow+1 = 2+1] 4013 $ j = {0,0,2} [size = 3] 4014 $ v = {1,2,3} [size = 3] 4015 $ 4016 $ Process1 [P1]: rows_owned=[2] 4017 $ i = {0,3} [size = nrow+1 = 1+1] 4018 $ j = {0,1,2} [size = 3] 4019 $ v = {4,5,6} [size = 3] 4020 4021 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4022 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4023 @*/ 4024 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4025 { 4026 PetscErrorCode ierr; 4027 4028 PetscFunctionBegin; 4029 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4030 PetscFunctionReturn(0); 4031 } 4032 4033 /*@C 4034 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4035 (the default parallel PETSc format). For good matrix assembly performance 4036 the user should preallocate the matrix storage by setting the parameters 4037 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4038 performance can be increased by more than a factor of 50. 4039 4040 Collective 4041 4042 Input Parameters: 4043 + B - the matrix 4044 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4045 (same value is used for all local rows) 4046 . d_nnz - array containing the number of nonzeros in the various rows of the 4047 DIAGONAL portion of the local submatrix (possibly different for each row) 4048 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4049 The size of this array is equal to the number of local rows, i.e 'm'. 4050 For matrices that will be factored, you must leave room for (and set) 4051 the diagonal entry even if it is zero. 4052 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4053 submatrix (same value is used for all local rows). 4054 - o_nnz - array containing the number of nonzeros in the various rows of the 4055 OFF-DIAGONAL portion of the local submatrix (possibly different for 4056 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4057 structure. The size of this array is equal to the number 4058 of local rows, i.e 'm'. 4059 4060 If the *_nnz parameter is given then the *_nz parameter is ignored 4061 4062 The AIJ format (also called the Yale sparse matrix format or 4063 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4064 storage. The stored row and column indices begin with zero. 4065 See Users-Manual: ch_mat for details. 4066 4067 The parallel matrix is partitioned such that the first m0 rows belong to 4068 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4069 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4070 4071 The DIAGONAL portion of the local submatrix of a processor can be defined 4072 as the submatrix which is obtained by extraction the part corresponding to 4073 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4074 first row that belongs to the processor, r2 is the last row belonging to 4075 the this processor, and c1-c2 is range of indices of the local part of a 4076 vector suitable for applying the matrix to. This is an mxn matrix. In the 4077 common case of a square matrix, the row and column ranges are the same and 4078 the DIAGONAL part is also square. The remaining portion of the local 4079 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4080 4081 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4082 4083 You can call MatGetInfo() to get information on how effective the preallocation was; 4084 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4085 You can also run with the option -info and look for messages with the string 4086 malloc in them to see if additional memory allocation was needed. 4087 4088 Example usage: 4089 4090 Consider the following 8x8 matrix with 34 non-zero values, that is 4091 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4092 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4093 as follows: 4094 4095 .vb 4096 1 2 0 | 0 3 0 | 0 4 4097 Proc0 0 5 6 | 7 0 0 | 8 0 4098 9 0 10 | 11 0 0 | 12 0 4099 ------------------------------------- 4100 13 0 14 | 15 16 17 | 0 0 4101 Proc1 0 18 0 | 19 20 21 | 0 0 4102 0 0 0 | 22 23 0 | 24 0 4103 ------------------------------------- 4104 Proc2 25 26 27 | 0 0 28 | 29 0 4105 30 0 0 | 31 32 33 | 0 34 4106 .ve 4107 4108 This can be represented as a collection of submatrices as: 4109 4110 .vb 4111 A B C 4112 D E F 4113 G H I 4114 .ve 4115 4116 Where the submatrices A,B,C are owned by proc0, D,E,F are 4117 owned by proc1, G,H,I are owned by proc2. 4118 4119 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4120 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4121 The 'M','N' parameters are 8,8, and have the same values on all procs. 4122 4123 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4124 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4125 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4126 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4127 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4128 matrix, ans [DF] as another SeqAIJ matrix. 4129 4130 When d_nz, o_nz parameters are specified, d_nz storage elements are 4131 allocated for every row of the local diagonal submatrix, and o_nz 4132 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4133 One way to choose d_nz and o_nz is to use the max nonzerors per local 4134 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4135 In this case, the values of d_nz,o_nz are: 4136 .vb 4137 proc0 : dnz = 2, o_nz = 2 4138 proc1 : dnz = 3, o_nz = 2 4139 proc2 : dnz = 1, o_nz = 4 4140 .ve 4141 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4142 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4143 for proc3. i.e we are using 12+15+10=37 storage locations to store 4144 34 values. 4145 4146 When d_nnz, o_nnz parameters are specified, the storage is specified 4147 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4148 In the above case the values for d_nnz,o_nnz are: 4149 .vb 4150 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4151 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4152 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4153 .ve 4154 Here the space allocated is sum of all the above values i.e 34, and 4155 hence pre-allocation is perfect. 4156 4157 Level: intermediate 4158 4159 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4160 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4161 @*/ 4162 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4163 { 4164 PetscErrorCode ierr; 4165 4166 PetscFunctionBegin; 4167 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4168 PetscValidType(B,1); 4169 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4170 PetscFunctionReturn(0); 4171 } 4172 4173 /*@ 4174 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4175 CSR format for the local rows. 4176 4177 Collective 4178 4179 Input Parameters: 4180 + comm - MPI communicator 4181 . m - number of local rows (Cannot be PETSC_DECIDE) 4182 . n - This value should be the same as the local size used in creating the 4183 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4184 calculated if N is given) For square matrices n is almost always m. 4185 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4186 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4187 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4188 . j - column indices 4189 - a - matrix values 4190 4191 Output Parameter: 4192 . mat - the matrix 4193 4194 Level: intermediate 4195 4196 Notes: 4197 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4198 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4199 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4200 4201 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4202 4203 The format which is used for the sparse matrix input, is equivalent to a 4204 row-major ordering.. i.e for the following matrix, the input data expected is 4205 as shown 4206 4207 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4208 4209 $ 1 0 0 4210 $ 2 0 3 P0 4211 $ ------- 4212 $ 4 5 6 P1 4213 $ 4214 $ Process0 [P0]: rows_owned=[0,1] 4215 $ i = {0,1,3} [size = nrow+1 = 2+1] 4216 $ j = {0,0,2} [size = 3] 4217 $ v = {1,2,3} [size = 3] 4218 $ 4219 $ Process1 [P1]: rows_owned=[2] 4220 $ i = {0,3} [size = nrow+1 = 1+1] 4221 $ j = {0,1,2} [size = 3] 4222 $ v = {4,5,6} [size = 3] 4223 4224 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4225 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4226 @*/ 4227 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4228 { 4229 PetscErrorCode ierr; 4230 4231 PetscFunctionBegin; 4232 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4233 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4234 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4235 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4236 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4237 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4238 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4239 PetscFunctionReturn(0); 4240 } 4241 4242 /*@ 4243 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4244 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4245 4246 Collective 4247 4248 Input Parameters: 4249 + mat - the matrix 4250 . m - number of local rows (Cannot be PETSC_DECIDE) 4251 . n - This value should be the same as the local size used in creating the 4252 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4253 calculated if N is given) For square matrices n is almost always m. 4254 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4255 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4256 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4257 . J - column indices 4258 - v - matrix values 4259 4260 Level: intermediate 4261 4262 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4263 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4264 @*/ 4265 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4266 { 4267 PetscErrorCode ierr; 4268 PetscInt cstart,nnz,i,j; 4269 PetscInt *ld; 4270 PetscBool nooffprocentries; 4271 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4272 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4273 PetscScalar *ad = Ad->a, *ao = Ao->a; 4274 const PetscInt *Adi = Ad->i; 4275 PetscInt ldi,Iii,md; 4276 4277 PetscFunctionBegin; 4278 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4279 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4280 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4281 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4282 4283 cstart = mat->cmap->rstart; 4284 if (!Aij->ld) { 4285 /* count number of entries below block diagonal */ 4286 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4287 Aij->ld = ld; 4288 for (i=0; i<m; i++) { 4289 nnz = Ii[i+1]- Ii[i]; 4290 j = 0; 4291 while (J[j] < cstart && j < nnz) {j++;} 4292 J += nnz; 4293 ld[i] = j; 4294 } 4295 } else { 4296 ld = Aij->ld; 4297 } 4298 4299 for (i=0; i<m; i++) { 4300 nnz = Ii[i+1]- Ii[i]; 4301 Iii = Ii[i]; 4302 ldi = ld[i]; 4303 md = Adi[i+1]-Adi[i]; 4304 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4305 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4306 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4307 ad += md; 4308 ao += nnz - md; 4309 } 4310 nooffprocentries = mat->nooffprocentries; 4311 mat->nooffprocentries = PETSC_TRUE; 4312 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4313 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4314 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4315 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4316 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4317 mat->nooffprocentries = nooffprocentries; 4318 PetscFunctionReturn(0); 4319 } 4320 4321 /*@C 4322 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4323 (the default parallel PETSc format). For good matrix assembly performance 4324 the user should preallocate the matrix storage by setting the parameters 4325 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4326 performance can be increased by more than a factor of 50. 4327 4328 Collective 4329 4330 Input Parameters: 4331 + comm - MPI communicator 4332 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4333 This value should be the same as the local size used in creating the 4334 y vector for the matrix-vector product y = Ax. 4335 . n - This value should be the same as the local size used in creating the 4336 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4337 calculated if N is given) For square matrices n is almost always m. 4338 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4339 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4340 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4341 (same value is used for all local rows) 4342 . d_nnz - array containing the number of nonzeros in the various rows of the 4343 DIAGONAL portion of the local submatrix (possibly different for each row) 4344 or NULL, if d_nz is used to specify the nonzero structure. 4345 The size of this array is equal to the number of local rows, i.e 'm'. 4346 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4347 submatrix (same value is used for all local rows). 4348 - o_nnz - array containing the number of nonzeros in the various rows of the 4349 OFF-DIAGONAL portion of the local submatrix (possibly different for 4350 each row) or NULL, if o_nz is used to specify the nonzero 4351 structure. The size of this array is equal to the number 4352 of local rows, i.e 'm'. 4353 4354 Output Parameter: 4355 . A - the matrix 4356 4357 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4358 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4359 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4360 4361 Notes: 4362 If the *_nnz parameter is given then the *_nz parameter is ignored 4363 4364 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4365 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4366 storage requirements for this matrix. 4367 4368 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4369 processor than it must be used on all processors that share the object for 4370 that argument. 4371 4372 The user MUST specify either the local or global matrix dimensions 4373 (possibly both). 4374 4375 The parallel matrix is partitioned across processors such that the 4376 first m0 rows belong to process 0, the next m1 rows belong to 4377 process 1, the next m2 rows belong to process 2 etc.. where 4378 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4379 values corresponding to [m x N] submatrix. 4380 4381 The columns are logically partitioned with the n0 columns belonging 4382 to 0th partition, the next n1 columns belonging to the next 4383 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4384 4385 The DIAGONAL portion of the local submatrix on any given processor 4386 is the submatrix corresponding to the rows and columns m,n 4387 corresponding to the given processor. i.e diagonal matrix on 4388 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4389 etc. The remaining portion of the local submatrix [m x (N-n)] 4390 constitute the OFF-DIAGONAL portion. The example below better 4391 illustrates this concept. 4392 4393 For a square global matrix we define each processor's diagonal portion 4394 to be its local rows and the corresponding columns (a square submatrix); 4395 each processor's off-diagonal portion encompasses the remainder of the 4396 local matrix (a rectangular submatrix). 4397 4398 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4399 4400 When calling this routine with a single process communicator, a matrix of 4401 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4402 type of communicator, use the construction mechanism 4403 .vb 4404 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4405 .ve 4406 4407 $ MatCreate(...,&A); 4408 $ MatSetType(A,MATMPIAIJ); 4409 $ MatSetSizes(A, m,n,M,N); 4410 $ MatMPIAIJSetPreallocation(A,...); 4411 4412 By default, this format uses inodes (identical nodes) when possible. 4413 We search for consecutive rows with the same nonzero structure, thereby 4414 reusing matrix information to achieve increased efficiency. 4415 4416 Options Database Keys: 4417 + -mat_no_inode - Do not use inodes 4418 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4419 4420 4421 4422 Example usage: 4423 4424 Consider the following 8x8 matrix with 34 non-zero values, that is 4425 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4426 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4427 as follows 4428 4429 .vb 4430 1 2 0 | 0 3 0 | 0 4 4431 Proc0 0 5 6 | 7 0 0 | 8 0 4432 9 0 10 | 11 0 0 | 12 0 4433 ------------------------------------- 4434 13 0 14 | 15 16 17 | 0 0 4435 Proc1 0 18 0 | 19 20 21 | 0 0 4436 0 0 0 | 22 23 0 | 24 0 4437 ------------------------------------- 4438 Proc2 25 26 27 | 0 0 28 | 29 0 4439 30 0 0 | 31 32 33 | 0 34 4440 .ve 4441 4442 This can be represented as a collection of submatrices as 4443 4444 .vb 4445 A B C 4446 D E F 4447 G H I 4448 .ve 4449 4450 Where the submatrices A,B,C are owned by proc0, D,E,F are 4451 owned by proc1, G,H,I are owned by proc2. 4452 4453 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4454 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4455 The 'M','N' parameters are 8,8, and have the same values on all procs. 4456 4457 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4458 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4459 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4460 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4461 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4462 matrix, ans [DF] as another SeqAIJ matrix. 4463 4464 When d_nz, o_nz parameters are specified, d_nz storage elements are 4465 allocated for every row of the local diagonal submatrix, and o_nz 4466 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4467 One way to choose d_nz and o_nz is to use the max nonzerors per local 4468 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4469 In this case, the values of d_nz,o_nz are 4470 .vb 4471 proc0 : dnz = 2, o_nz = 2 4472 proc1 : dnz = 3, o_nz = 2 4473 proc2 : dnz = 1, o_nz = 4 4474 .ve 4475 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4476 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4477 for proc3. i.e we are using 12+15+10=37 storage locations to store 4478 34 values. 4479 4480 When d_nnz, o_nnz parameters are specified, the storage is specified 4481 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4482 In the above case the values for d_nnz,o_nnz are 4483 .vb 4484 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4485 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4486 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4487 .ve 4488 Here the space allocated is sum of all the above values i.e 34, and 4489 hence pre-allocation is perfect. 4490 4491 Level: intermediate 4492 4493 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4494 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4495 @*/ 4496 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4497 { 4498 PetscErrorCode ierr; 4499 PetscMPIInt size; 4500 4501 PetscFunctionBegin; 4502 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4503 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4504 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4505 if (size > 1) { 4506 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4507 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4508 } else { 4509 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4510 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4511 } 4512 PetscFunctionReturn(0); 4513 } 4514 4515 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4516 { 4517 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4518 PetscBool flg; 4519 PetscErrorCode ierr; 4520 4521 PetscFunctionBegin; 4522 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4523 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4524 if (Ad) *Ad = a->A; 4525 if (Ao) *Ao = a->B; 4526 if (colmap) *colmap = a->garray; 4527 PetscFunctionReturn(0); 4528 } 4529 4530 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4531 { 4532 PetscErrorCode ierr; 4533 PetscInt m,N,i,rstart,nnz,Ii; 4534 PetscInt *indx; 4535 PetscScalar *values; 4536 4537 PetscFunctionBegin; 4538 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4539 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4540 PetscInt *dnz,*onz,sum,bs,cbs; 4541 4542 if (n == PETSC_DECIDE) { 4543 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4544 } 4545 /* Check sum(n) = N */ 4546 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4547 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4548 4549 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4550 rstart -= m; 4551 4552 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4553 for (i=0; i<m; i++) { 4554 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4555 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4556 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4557 } 4558 4559 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4560 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4561 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4562 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4563 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4564 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4565 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4566 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4567 } 4568 4569 /* numeric phase */ 4570 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4571 for (i=0; i<m; i++) { 4572 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4573 Ii = i + rstart; 4574 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4575 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4576 } 4577 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4578 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4579 PetscFunctionReturn(0); 4580 } 4581 4582 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4583 { 4584 PetscErrorCode ierr; 4585 PetscMPIInt rank; 4586 PetscInt m,N,i,rstart,nnz; 4587 size_t len; 4588 const PetscInt *indx; 4589 PetscViewer out; 4590 char *name; 4591 Mat B; 4592 const PetscScalar *values; 4593 4594 PetscFunctionBegin; 4595 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4596 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4597 /* Should this be the type of the diagonal block of A? */ 4598 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4599 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4600 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4601 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4602 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4603 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4604 for (i=0; i<m; i++) { 4605 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4606 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4607 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4608 } 4609 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4610 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4611 4612 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4613 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4614 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4615 sprintf(name,"%s.%d",outfile,rank); 4616 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4617 ierr = PetscFree(name);CHKERRQ(ierr); 4618 ierr = MatView(B,out);CHKERRQ(ierr); 4619 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4620 ierr = MatDestroy(&B);CHKERRQ(ierr); 4621 PetscFunctionReturn(0); 4622 } 4623 4624 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4625 { 4626 PetscErrorCode ierr; 4627 Mat_Merge_SeqsToMPI *merge; 4628 PetscContainer container; 4629 4630 PetscFunctionBegin; 4631 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4632 if (container) { 4633 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4634 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4635 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4636 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4637 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4638 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4639 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4640 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4641 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4642 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4643 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4644 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4645 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4646 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4647 ierr = PetscFree(merge);CHKERRQ(ierr); 4648 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4649 } 4650 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4651 PetscFunctionReturn(0); 4652 } 4653 4654 #include <../src/mat/utils/freespace.h> 4655 #include <petscbt.h> 4656 4657 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4658 { 4659 PetscErrorCode ierr; 4660 MPI_Comm comm; 4661 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4662 PetscMPIInt size,rank,taga,*len_s; 4663 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4664 PetscInt proc,m; 4665 PetscInt **buf_ri,**buf_rj; 4666 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4667 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4668 MPI_Request *s_waits,*r_waits; 4669 MPI_Status *status; 4670 MatScalar *aa=a->a; 4671 MatScalar **abuf_r,*ba_i; 4672 Mat_Merge_SeqsToMPI *merge; 4673 PetscContainer container; 4674 4675 PetscFunctionBegin; 4676 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4677 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4678 4679 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4680 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4681 4682 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4683 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4684 4685 bi = merge->bi; 4686 bj = merge->bj; 4687 buf_ri = merge->buf_ri; 4688 buf_rj = merge->buf_rj; 4689 4690 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4691 owners = merge->rowmap->range; 4692 len_s = merge->len_s; 4693 4694 /* send and recv matrix values */ 4695 /*-----------------------------*/ 4696 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4697 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4698 4699 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4700 for (proc=0,k=0; proc<size; proc++) { 4701 if (!len_s[proc]) continue; 4702 i = owners[proc]; 4703 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4704 k++; 4705 } 4706 4707 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4708 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4709 ierr = PetscFree(status);CHKERRQ(ierr); 4710 4711 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4712 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4713 4714 /* insert mat values of mpimat */ 4715 /*----------------------------*/ 4716 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4717 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4718 4719 for (k=0; k<merge->nrecv; k++) { 4720 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4721 nrows = *(buf_ri_k[k]); 4722 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4723 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4724 } 4725 4726 /* set values of ba */ 4727 m = merge->rowmap->n; 4728 for (i=0; i<m; i++) { 4729 arow = owners[rank] + i; 4730 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4731 bnzi = bi[i+1] - bi[i]; 4732 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4733 4734 /* add local non-zero vals of this proc's seqmat into ba */ 4735 anzi = ai[arow+1] - ai[arow]; 4736 aj = a->j + ai[arow]; 4737 aa = a->a + ai[arow]; 4738 nextaj = 0; 4739 for (j=0; nextaj<anzi; j++) { 4740 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4741 ba_i[j] += aa[nextaj++]; 4742 } 4743 } 4744 4745 /* add received vals into ba */ 4746 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4747 /* i-th row */ 4748 if (i == *nextrow[k]) { 4749 anzi = *(nextai[k]+1) - *nextai[k]; 4750 aj = buf_rj[k] + *(nextai[k]); 4751 aa = abuf_r[k] + *(nextai[k]); 4752 nextaj = 0; 4753 for (j=0; nextaj<anzi; j++) { 4754 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4755 ba_i[j] += aa[nextaj++]; 4756 } 4757 } 4758 nextrow[k]++; nextai[k]++; 4759 } 4760 } 4761 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4762 } 4763 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4764 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4765 4766 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4767 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4768 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4769 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4770 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4771 PetscFunctionReturn(0); 4772 } 4773 4774 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4775 { 4776 PetscErrorCode ierr; 4777 Mat B_mpi; 4778 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4779 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4780 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4781 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4782 PetscInt len,proc,*dnz,*onz,bs,cbs; 4783 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4784 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4785 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4786 MPI_Status *status; 4787 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4788 PetscBT lnkbt; 4789 Mat_Merge_SeqsToMPI *merge; 4790 PetscContainer container; 4791 4792 PetscFunctionBegin; 4793 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4794 4795 /* make sure it is a PETSc comm */ 4796 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4798 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4799 4800 ierr = PetscNew(&merge);CHKERRQ(ierr); 4801 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4802 4803 /* determine row ownership */ 4804 /*---------------------------------------------------------*/ 4805 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4806 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4807 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4808 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4809 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4810 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4811 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4812 4813 m = merge->rowmap->n; 4814 owners = merge->rowmap->range; 4815 4816 /* determine the number of messages to send, their lengths */ 4817 /*---------------------------------------------------------*/ 4818 len_s = merge->len_s; 4819 4820 len = 0; /* length of buf_si[] */ 4821 merge->nsend = 0; 4822 for (proc=0; proc<size; proc++) { 4823 len_si[proc] = 0; 4824 if (proc == rank) { 4825 len_s[proc] = 0; 4826 } else { 4827 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4828 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4829 } 4830 if (len_s[proc]) { 4831 merge->nsend++; 4832 nrows = 0; 4833 for (i=owners[proc]; i<owners[proc+1]; i++) { 4834 if (ai[i+1] > ai[i]) nrows++; 4835 } 4836 len_si[proc] = 2*(nrows+1); 4837 len += len_si[proc]; 4838 } 4839 } 4840 4841 /* determine the number and length of messages to receive for ij-structure */ 4842 /*-------------------------------------------------------------------------*/ 4843 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4844 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4845 4846 /* post the Irecv of j-structure */ 4847 /*-------------------------------*/ 4848 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4849 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4850 4851 /* post the Isend of j-structure */ 4852 /*--------------------------------*/ 4853 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4854 4855 for (proc=0, k=0; proc<size; proc++) { 4856 if (!len_s[proc]) continue; 4857 i = owners[proc]; 4858 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4859 k++; 4860 } 4861 4862 /* receives and sends of j-structure are complete */ 4863 /*------------------------------------------------*/ 4864 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4865 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4866 4867 /* send and recv i-structure */ 4868 /*---------------------------*/ 4869 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4870 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4871 4872 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4873 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4874 for (proc=0,k=0; proc<size; proc++) { 4875 if (!len_s[proc]) continue; 4876 /* form outgoing message for i-structure: 4877 buf_si[0]: nrows to be sent 4878 [1:nrows]: row index (global) 4879 [nrows+1:2*nrows+1]: i-structure index 4880 */ 4881 /*-------------------------------------------*/ 4882 nrows = len_si[proc]/2 - 1; 4883 buf_si_i = buf_si + nrows+1; 4884 buf_si[0] = nrows; 4885 buf_si_i[0] = 0; 4886 nrows = 0; 4887 for (i=owners[proc]; i<owners[proc+1]; i++) { 4888 anzi = ai[i+1] - ai[i]; 4889 if (anzi) { 4890 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4891 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4892 nrows++; 4893 } 4894 } 4895 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4896 k++; 4897 buf_si += len_si[proc]; 4898 } 4899 4900 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4901 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4902 4903 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4904 for (i=0; i<merge->nrecv; i++) { 4905 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4906 } 4907 4908 ierr = PetscFree(len_si);CHKERRQ(ierr); 4909 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4910 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4911 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4912 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4913 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4914 ierr = PetscFree(status);CHKERRQ(ierr); 4915 4916 /* compute a local seq matrix in each processor */ 4917 /*----------------------------------------------*/ 4918 /* allocate bi array and free space for accumulating nonzero column info */ 4919 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4920 bi[0] = 0; 4921 4922 /* create and initialize a linked list */ 4923 nlnk = N+1; 4924 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4925 4926 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4927 len = ai[owners[rank+1]] - ai[owners[rank]]; 4928 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4929 4930 current_space = free_space; 4931 4932 /* determine symbolic info for each local row */ 4933 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4934 4935 for (k=0; k<merge->nrecv; k++) { 4936 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4937 nrows = *buf_ri_k[k]; 4938 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4939 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4940 } 4941 4942 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4943 len = 0; 4944 for (i=0; i<m; i++) { 4945 bnzi = 0; 4946 /* add local non-zero cols of this proc's seqmat into lnk */ 4947 arow = owners[rank] + i; 4948 anzi = ai[arow+1] - ai[arow]; 4949 aj = a->j + ai[arow]; 4950 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4951 bnzi += nlnk; 4952 /* add received col data into lnk */ 4953 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4954 if (i == *nextrow[k]) { /* i-th row */ 4955 anzi = *(nextai[k]+1) - *nextai[k]; 4956 aj = buf_rj[k] + *nextai[k]; 4957 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4958 bnzi += nlnk; 4959 nextrow[k]++; nextai[k]++; 4960 } 4961 } 4962 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4963 4964 /* if free space is not available, make more free space */ 4965 if (current_space->local_remaining<bnzi) { 4966 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4967 nspacedouble++; 4968 } 4969 /* copy data into free space, then initialize lnk */ 4970 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4971 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4972 4973 current_space->array += bnzi; 4974 current_space->local_used += bnzi; 4975 current_space->local_remaining -= bnzi; 4976 4977 bi[i+1] = bi[i] + bnzi; 4978 } 4979 4980 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4981 4982 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4983 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4984 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4985 4986 /* create symbolic parallel matrix B_mpi */ 4987 /*---------------------------------------*/ 4988 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4989 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4990 if (n==PETSC_DECIDE) { 4991 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4992 } else { 4993 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4994 } 4995 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4996 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4997 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4998 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4999 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5000 5001 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5002 B_mpi->assembled = PETSC_FALSE; 5003 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5004 merge->bi = bi; 5005 merge->bj = bj; 5006 merge->buf_ri = buf_ri; 5007 merge->buf_rj = buf_rj; 5008 merge->coi = NULL; 5009 merge->coj = NULL; 5010 merge->owners_co = NULL; 5011 5012 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5013 5014 /* attach the supporting struct to B_mpi for reuse */ 5015 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5016 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5017 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5018 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5019 *mpimat = B_mpi; 5020 5021 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5022 PetscFunctionReturn(0); 5023 } 5024 5025 /*@C 5026 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5027 matrices from each processor 5028 5029 Collective 5030 5031 Input Parameters: 5032 + comm - the communicators the parallel matrix will live on 5033 . seqmat - the input sequential matrices 5034 . m - number of local rows (or PETSC_DECIDE) 5035 . n - number of local columns (or PETSC_DECIDE) 5036 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5037 5038 Output Parameter: 5039 . mpimat - the parallel matrix generated 5040 5041 Level: advanced 5042 5043 Notes: 5044 The dimensions of the sequential matrix in each processor MUST be the same. 5045 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5046 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5047 @*/ 5048 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5049 { 5050 PetscErrorCode ierr; 5051 PetscMPIInt size; 5052 5053 PetscFunctionBegin; 5054 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5055 if (size == 1) { 5056 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5057 if (scall == MAT_INITIAL_MATRIX) { 5058 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5059 } else { 5060 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5061 } 5062 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5063 PetscFunctionReturn(0); 5064 } 5065 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5066 if (scall == MAT_INITIAL_MATRIX) { 5067 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5068 } 5069 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5070 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5071 PetscFunctionReturn(0); 5072 } 5073 5074 /*@ 5075 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5076 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5077 with MatGetSize() 5078 5079 Not Collective 5080 5081 Input Parameters: 5082 + A - the matrix 5083 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5084 5085 Output Parameter: 5086 . A_loc - the local sequential matrix generated 5087 5088 Level: developer 5089 5090 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5091 5092 @*/ 5093 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5094 { 5095 PetscErrorCode ierr; 5096 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5097 Mat_SeqAIJ *mat,*a,*b; 5098 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5099 MatScalar *aa,*ba,*cam; 5100 PetscScalar *ca; 5101 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5102 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5103 PetscBool match; 5104 MPI_Comm comm; 5105 PetscMPIInt size; 5106 5107 PetscFunctionBegin; 5108 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5109 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5110 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5111 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5112 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5113 5114 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5115 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5116 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5117 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5118 aa = a->a; ba = b->a; 5119 if (scall == MAT_INITIAL_MATRIX) { 5120 if (size == 1) { 5121 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5122 PetscFunctionReturn(0); 5123 } 5124 5125 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5126 ci[0] = 0; 5127 for (i=0; i<am; i++) { 5128 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5129 } 5130 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5131 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5132 k = 0; 5133 for (i=0; i<am; i++) { 5134 ncols_o = bi[i+1] - bi[i]; 5135 ncols_d = ai[i+1] - ai[i]; 5136 /* off-diagonal portion of A */ 5137 for (jo=0; jo<ncols_o; jo++) { 5138 col = cmap[*bj]; 5139 if (col >= cstart) break; 5140 cj[k] = col; bj++; 5141 ca[k++] = *ba++; 5142 } 5143 /* diagonal portion of A */ 5144 for (j=0; j<ncols_d; j++) { 5145 cj[k] = cstart + *aj++; 5146 ca[k++] = *aa++; 5147 } 5148 /* off-diagonal portion of A */ 5149 for (j=jo; j<ncols_o; j++) { 5150 cj[k] = cmap[*bj++]; 5151 ca[k++] = *ba++; 5152 } 5153 } 5154 /* put together the new matrix */ 5155 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5156 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5157 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5158 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5159 mat->free_a = PETSC_TRUE; 5160 mat->free_ij = PETSC_TRUE; 5161 mat->nonew = 0; 5162 } else if (scall == MAT_REUSE_MATRIX) { 5163 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5164 ci = mat->i; cj = mat->j; cam = mat->a; 5165 for (i=0; i<am; i++) { 5166 /* off-diagonal portion of A */ 5167 ncols_o = bi[i+1] - bi[i]; 5168 for (jo=0; jo<ncols_o; jo++) { 5169 col = cmap[*bj]; 5170 if (col >= cstart) break; 5171 *cam++ = *ba++; bj++; 5172 } 5173 /* diagonal portion of A */ 5174 ncols_d = ai[i+1] - ai[i]; 5175 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5176 /* off-diagonal portion of A */ 5177 for (j=jo; j<ncols_o; j++) { 5178 *cam++ = *ba++; bj++; 5179 } 5180 } 5181 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5182 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5183 PetscFunctionReturn(0); 5184 } 5185 5186 /*@C 5187 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5188 5189 Not Collective 5190 5191 Input Parameters: 5192 + A - the matrix 5193 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5194 - row, col - index sets of rows and columns to extract (or NULL) 5195 5196 Output Parameter: 5197 . A_loc - the local sequential matrix generated 5198 5199 Level: developer 5200 5201 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5202 5203 @*/ 5204 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5205 { 5206 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5207 PetscErrorCode ierr; 5208 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5209 IS isrowa,iscola; 5210 Mat *aloc; 5211 PetscBool match; 5212 5213 PetscFunctionBegin; 5214 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5215 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5216 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5217 if (!row) { 5218 start = A->rmap->rstart; end = A->rmap->rend; 5219 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5220 } else { 5221 isrowa = *row; 5222 } 5223 if (!col) { 5224 start = A->cmap->rstart; 5225 cmap = a->garray; 5226 nzA = a->A->cmap->n; 5227 nzB = a->B->cmap->n; 5228 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5229 ncols = 0; 5230 for (i=0; i<nzB; i++) { 5231 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5232 else break; 5233 } 5234 imark = i; 5235 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5236 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5237 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5238 } else { 5239 iscola = *col; 5240 } 5241 if (scall != MAT_INITIAL_MATRIX) { 5242 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5243 aloc[0] = *A_loc; 5244 } 5245 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5246 if (!col) { /* attach global id of condensed columns */ 5247 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5248 } 5249 *A_loc = aloc[0]; 5250 ierr = PetscFree(aloc);CHKERRQ(ierr); 5251 if (!row) { 5252 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5253 } 5254 if (!col) { 5255 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5256 } 5257 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5258 PetscFunctionReturn(0); 5259 } 5260 5261 /* 5262 * Destroy a mat that may be composed with PetscSF communication objects. 5263 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5264 * */ 5265 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5266 { 5267 PetscSF sf,osf; 5268 IS map; 5269 PetscErrorCode ierr; 5270 5271 PetscFunctionBegin; 5272 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5273 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5274 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5275 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5276 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5277 ierr = ISDestroy(&map);CHKERRQ(ierr); 5278 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5279 PetscFunctionReturn(0); 5280 } 5281 5282 /* 5283 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5284 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5285 * on a global size. 5286 * */ 5287 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5288 { 5289 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5290 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5291 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,owner,lidx,*nrcols,*nlcols,ncol; 5292 PetscSFNode *iremote,*oiremote; 5293 const PetscInt *lrowindices; 5294 PetscErrorCode ierr; 5295 PetscSF sf,osf; 5296 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5297 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5298 MPI_Comm comm; 5299 ISLocalToGlobalMapping mapping; 5300 5301 PetscFunctionBegin; 5302 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5303 /* plocalsize is the number of roots 5304 * nrows is the number of leaves 5305 * */ 5306 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5307 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5308 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5309 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5310 for (i=0;i<nrows;i++) { 5311 /* Find a remote index and an owner for a row 5312 * The row could be local or remote 5313 * */ 5314 owner = 0; 5315 lidx = 0; 5316 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5317 iremote[i].index = lidx; 5318 iremote[i].rank = owner; 5319 } 5320 /* Create SF to communicate how many nonzero columns for each row */ 5321 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5322 /* SF will figure out the number of nonzero colunms for each row, and their 5323 * offsets 5324 * */ 5325 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5326 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5327 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5328 5329 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5330 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5331 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5332 roffsets[0] = 0; 5333 roffsets[1] = 0; 5334 for (i=0;i<plocalsize;i++) { 5335 /* diag */ 5336 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5337 /* off diag */ 5338 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5339 /* compute offsets so that we relative location for each row */ 5340 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5341 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5342 } 5343 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5344 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5345 /* 'r' means root, and 'l' means leaf */ 5346 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5347 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5348 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5349 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5350 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5351 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5352 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5353 dntotalcols = 0; 5354 ontotalcols = 0; 5355 ncol = 0; 5356 for (i=0;i<nrows;i++) { 5357 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5358 ncol = PetscMax(pnnz[i],ncol); 5359 /* diag */ 5360 dntotalcols += nlcols[i*2+0]; 5361 /* off diag */ 5362 ontotalcols += nlcols[i*2+1]; 5363 } 5364 /* We do not need to figure the right number of columns 5365 * since all the calculations will be done by going through the raw data 5366 * */ 5367 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5368 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5369 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5370 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5371 /* diag */ 5372 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5373 /* off diag */ 5374 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5375 /* diag */ 5376 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5377 /* off diag */ 5378 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5379 dntotalcols = 0; 5380 ontotalcols = 0; 5381 ntotalcols = 0; 5382 for (i=0;i<nrows;i++) { 5383 owner = 0; 5384 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5385 /* Set iremote for diag matrix */ 5386 for (j=0;j<nlcols[i*2+0];j++) { 5387 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5388 iremote[dntotalcols].rank = owner; 5389 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5390 ilocal[dntotalcols++] = ntotalcols++; 5391 } 5392 /* off diag */ 5393 for (j=0;j<nlcols[i*2+1];j++) { 5394 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5395 oiremote[ontotalcols].rank = owner; 5396 oilocal[ontotalcols++] = ntotalcols++; 5397 } 5398 } 5399 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5400 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5401 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5402 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5403 /* P serves as roots and P_oth is leaves 5404 * Diag matrix 5405 * */ 5406 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5407 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5408 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5409 5410 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5411 /* Off diag */ 5412 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5413 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5414 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5415 /* We operate on the matrix internal data for saving memory */ 5416 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5417 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5418 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5419 /* Convert to global indices for diag matrix */ 5420 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5421 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5422 /* We want P_oth store global indices */ 5423 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5424 /* Use memory scalable approach */ 5425 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5426 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5427 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5428 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5429 /* Convert back to local indices */ 5430 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5431 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5432 nout = 0; 5433 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5434 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5435 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5436 /* Exchange values */ 5437 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5438 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5439 /* Stop PETSc from shrinking memory */ 5440 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5441 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5442 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5443 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5444 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5445 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5446 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5447 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5448 PetscFunctionReturn(0); 5449 } 5450 5451 /* 5452 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5453 * This supports MPIAIJ and MAIJ 5454 * */ 5455 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5456 { 5457 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5458 Mat_SeqAIJ *p_oth; 5459 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5460 IS rows,map; 5461 PetscHMapI hamp; 5462 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5463 MPI_Comm comm; 5464 PetscSF sf,osf; 5465 PetscBool has; 5466 PetscErrorCode ierr; 5467 5468 PetscFunctionBegin; 5469 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5470 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5471 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5472 * and then create a submatrix (that often is an overlapping matrix) 5473 * */ 5474 if (reuse==MAT_INITIAL_MATRIX) { 5475 /* Use a hash table to figure out unique keys */ 5476 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5477 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5478 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5479 count = 0; 5480 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5481 for (i=0;i<a->B->cmap->n;i++) { 5482 key = a->garray[i]/dof; 5483 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5484 if (!has) { 5485 mapping[i] = count; 5486 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5487 } else { 5488 /* Current 'i' has the same value the previous step */ 5489 mapping[i] = count-1; 5490 } 5491 } 5492 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5493 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5494 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5495 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5496 off = 0; 5497 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5498 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5499 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5500 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5501 /* In case, the matrix was already created but users want to recreate the matrix */ 5502 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5503 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5504 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5505 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5506 } else if (reuse==MAT_REUSE_MATRIX) { 5507 /* If matrix was already created, we simply update values using SF objects 5508 * that as attached to the matrix ealier. 5509 * */ 5510 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5511 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5512 if (!sf || !osf) { 5513 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5514 } 5515 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5516 /* Update values in place */ 5517 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5518 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5519 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5520 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5521 } else { 5522 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5523 } 5524 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5525 PetscFunctionReturn(0); 5526 } 5527 5528 /*@C 5529 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5530 5531 Collective on Mat 5532 5533 Input Parameters: 5534 + A,B - the matrices in mpiaij format 5535 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5536 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5537 5538 Output Parameter: 5539 + rowb, colb - index sets of rows and columns of B to extract 5540 - B_seq - the sequential matrix generated 5541 5542 Level: developer 5543 5544 @*/ 5545 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5546 { 5547 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5548 PetscErrorCode ierr; 5549 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5550 IS isrowb,iscolb; 5551 Mat *bseq=NULL; 5552 5553 PetscFunctionBegin; 5554 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5555 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5556 } 5557 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5558 5559 if (scall == MAT_INITIAL_MATRIX) { 5560 start = A->cmap->rstart; 5561 cmap = a->garray; 5562 nzA = a->A->cmap->n; 5563 nzB = a->B->cmap->n; 5564 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5565 ncols = 0; 5566 for (i=0; i<nzB; i++) { /* row < local row index */ 5567 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5568 else break; 5569 } 5570 imark = i; 5571 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5572 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5573 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5574 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5575 } else { 5576 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5577 isrowb = *rowb; iscolb = *colb; 5578 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5579 bseq[0] = *B_seq; 5580 } 5581 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5582 *B_seq = bseq[0]; 5583 ierr = PetscFree(bseq);CHKERRQ(ierr); 5584 if (!rowb) { 5585 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5586 } else { 5587 *rowb = isrowb; 5588 } 5589 if (!colb) { 5590 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5591 } else { 5592 *colb = iscolb; 5593 } 5594 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5595 PetscFunctionReturn(0); 5596 } 5597 5598 /* 5599 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5600 of the OFF-DIAGONAL portion of local A 5601 5602 Collective on Mat 5603 5604 Input Parameters: 5605 + A,B - the matrices in mpiaij format 5606 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5607 5608 Output Parameter: 5609 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5610 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5611 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5612 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5613 5614 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5615 for this matrix. This is not desirable.. 5616 5617 Level: developer 5618 5619 */ 5620 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5621 { 5622 PetscErrorCode ierr; 5623 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5624 Mat_SeqAIJ *b_oth; 5625 VecScatter ctx; 5626 MPI_Comm comm; 5627 const PetscMPIInt *rprocs,*sprocs; 5628 const PetscInt *srow,*rstarts,*sstarts; 5629 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5630 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5631 PetscScalar *b_otha,*bufa,*bufA,*vals; 5632 MPI_Request *rwaits = NULL,*swaits = NULL; 5633 MPI_Status rstatus; 5634 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5635 5636 PetscFunctionBegin; 5637 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5638 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5639 5640 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5641 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5642 } 5643 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5644 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5645 5646 if (size == 1) { 5647 startsj_s = NULL; 5648 bufa_ptr = NULL; 5649 *B_oth = NULL; 5650 PetscFunctionReturn(0); 5651 } 5652 5653 ctx = a->Mvctx; 5654 tag = ((PetscObject)ctx)->tag; 5655 5656 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5657 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5658 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5659 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5660 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5661 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5662 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5663 5664 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5665 if (scall == MAT_INITIAL_MATRIX) { 5666 /* i-array */ 5667 /*---------*/ 5668 /* post receives */ 5669 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5670 for (i=0; i<nrecvs; i++) { 5671 rowlen = rvalues + rstarts[i]*rbs; 5672 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5673 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5674 } 5675 5676 /* pack the outgoing message */ 5677 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5678 5679 sstartsj[0] = 0; 5680 rstartsj[0] = 0; 5681 len = 0; /* total length of j or a array to be sent */ 5682 if (nsends) { 5683 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5684 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5685 } 5686 for (i=0; i<nsends; i++) { 5687 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5688 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5689 for (j=0; j<nrows; j++) { 5690 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5691 for (l=0; l<sbs; l++) { 5692 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5693 5694 rowlen[j*sbs+l] = ncols; 5695 5696 len += ncols; 5697 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5698 } 5699 k++; 5700 } 5701 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5702 5703 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5704 } 5705 /* recvs and sends of i-array are completed */ 5706 i = nrecvs; 5707 while (i--) { 5708 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5709 } 5710 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5711 ierr = PetscFree(svalues);CHKERRQ(ierr); 5712 5713 /* allocate buffers for sending j and a arrays */ 5714 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5715 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5716 5717 /* create i-array of B_oth */ 5718 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5719 5720 b_othi[0] = 0; 5721 len = 0; /* total length of j or a array to be received */ 5722 k = 0; 5723 for (i=0; i<nrecvs; i++) { 5724 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5725 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5726 for (j=0; j<nrows; j++) { 5727 b_othi[k+1] = b_othi[k] + rowlen[j]; 5728 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5729 k++; 5730 } 5731 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5732 } 5733 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5734 5735 /* allocate space for j and a arrrays of B_oth */ 5736 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5737 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5738 5739 /* j-array */ 5740 /*---------*/ 5741 /* post receives of j-array */ 5742 for (i=0; i<nrecvs; i++) { 5743 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5744 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5745 } 5746 5747 /* pack the outgoing message j-array */ 5748 if (nsends) k = sstarts[0]; 5749 for (i=0; i<nsends; i++) { 5750 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5751 bufJ = bufj+sstartsj[i]; 5752 for (j=0; j<nrows; j++) { 5753 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5754 for (ll=0; ll<sbs; ll++) { 5755 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5756 for (l=0; l<ncols; l++) { 5757 *bufJ++ = cols[l]; 5758 } 5759 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5760 } 5761 } 5762 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5763 } 5764 5765 /* recvs and sends of j-array are completed */ 5766 i = nrecvs; 5767 while (i--) { 5768 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5769 } 5770 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5771 } else if (scall == MAT_REUSE_MATRIX) { 5772 sstartsj = *startsj_s; 5773 rstartsj = *startsj_r; 5774 bufa = *bufa_ptr; 5775 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5776 b_otha = b_oth->a; 5777 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5778 5779 /* a-array */ 5780 /*---------*/ 5781 /* post receives of a-array */ 5782 for (i=0; i<nrecvs; i++) { 5783 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5784 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5785 } 5786 5787 /* pack the outgoing message a-array */ 5788 if (nsends) k = sstarts[0]; 5789 for (i=0; i<nsends; i++) { 5790 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5791 bufA = bufa+sstartsj[i]; 5792 for (j=0; j<nrows; j++) { 5793 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5794 for (ll=0; ll<sbs; ll++) { 5795 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5796 for (l=0; l<ncols; l++) { 5797 *bufA++ = vals[l]; 5798 } 5799 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5800 } 5801 } 5802 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5803 } 5804 /* recvs and sends of a-array are completed */ 5805 i = nrecvs; 5806 while (i--) { 5807 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5808 } 5809 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5810 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5811 5812 if (scall == MAT_INITIAL_MATRIX) { 5813 /* put together the new matrix */ 5814 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5815 5816 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5817 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5818 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5819 b_oth->free_a = PETSC_TRUE; 5820 b_oth->free_ij = PETSC_TRUE; 5821 b_oth->nonew = 0; 5822 5823 ierr = PetscFree(bufj);CHKERRQ(ierr); 5824 if (!startsj_s || !bufa_ptr) { 5825 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5826 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5827 } else { 5828 *startsj_s = sstartsj; 5829 *startsj_r = rstartsj; 5830 *bufa_ptr = bufa; 5831 } 5832 } 5833 5834 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5835 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5836 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5837 PetscFunctionReturn(0); 5838 } 5839 5840 /*@C 5841 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5842 5843 Not Collective 5844 5845 Input Parameters: 5846 . A - The matrix in mpiaij format 5847 5848 Output Parameter: 5849 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5850 . colmap - A map from global column index to local index into lvec 5851 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5852 5853 Level: developer 5854 5855 @*/ 5856 #if defined(PETSC_USE_CTABLE) 5857 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5858 #else 5859 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5860 #endif 5861 { 5862 Mat_MPIAIJ *a; 5863 5864 PetscFunctionBegin; 5865 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5866 PetscValidPointer(lvec, 2); 5867 PetscValidPointer(colmap, 3); 5868 PetscValidPointer(multScatter, 4); 5869 a = (Mat_MPIAIJ*) A->data; 5870 if (lvec) *lvec = a->lvec; 5871 if (colmap) *colmap = a->colmap; 5872 if (multScatter) *multScatter = a->Mvctx; 5873 PetscFunctionReturn(0); 5874 } 5875 5876 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5877 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5878 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5879 #if defined(PETSC_HAVE_MKL_SPARSE) 5880 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5881 #endif 5882 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5883 #if defined(PETSC_HAVE_ELEMENTAL) 5884 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5885 #endif 5886 #if defined(PETSC_HAVE_HYPRE) 5887 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5888 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5889 #endif 5890 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5891 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5892 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5893 5894 /* 5895 Computes (B'*A')' since computing B*A directly is untenable 5896 5897 n p p 5898 ( ) ( ) ( ) 5899 m ( A ) * n ( B ) = m ( C ) 5900 ( ) ( ) ( ) 5901 5902 */ 5903 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5904 { 5905 PetscErrorCode ierr; 5906 Mat At,Bt,Ct; 5907 5908 PetscFunctionBegin; 5909 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5910 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5911 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5912 ierr = MatDestroy(&At);CHKERRQ(ierr); 5913 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5914 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5915 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5916 PetscFunctionReturn(0); 5917 } 5918 5919 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5920 { 5921 PetscErrorCode ierr; 5922 PetscInt m=A->rmap->n,n=B->cmap->n; 5923 Mat Cmat; 5924 5925 PetscFunctionBegin; 5926 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5927 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5928 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5929 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5930 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5931 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5932 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5933 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5934 5935 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5936 5937 *C = Cmat; 5938 PetscFunctionReturn(0); 5939 } 5940 5941 /* ----------------------------------------------------------------*/ 5942 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5943 { 5944 PetscErrorCode ierr; 5945 5946 PetscFunctionBegin; 5947 if (scall == MAT_INITIAL_MATRIX) { 5948 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5949 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5950 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5951 } 5952 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5953 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5954 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5955 PetscFunctionReturn(0); 5956 } 5957 5958 /*MC 5959 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5960 5961 Options Database Keys: 5962 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5963 5964 Level: beginner 5965 5966 Notes: 5967 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5968 in this case the values associated with the rows and columns one passes in are set to zero 5969 in the matrix 5970 5971 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5972 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5973 5974 .seealso: MatCreateAIJ() 5975 M*/ 5976 5977 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5978 { 5979 Mat_MPIAIJ *b; 5980 PetscErrorCode ierr; 5981 PetscMPIInt size; 5982 5983 PetscFunctionBegin; 5984 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5985 5986 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5987 B->data = (void*)b; 5988 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5989 B->assembled = PETSC_FALSE; 5990 B->insertmode = NOT_SET_VALUES; 5991 b->size = size; 5992 5993 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5994 5995 /* build cache for off array entries formed */ 5996 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5997 5998 b->donotstash = PETSC_FALSE; 5999 b->colmap = 0; 6000 b->garray = 0; 6001 b->roworiented = PETSC_TRUE; 6002 6003 /* stuff used for matrix vector multiply */ 6004 b->lvec = NULL; 6005 b->Mvctx = NULL; 6006 6007 /* stuff for MatGetRow() */ 6008 b->rowindices = 0; 6009 b->rowvalues = 0; 6010 b->getrowactive = PETSC_FALSE; 6011 6012 /* flexible pointer used in CUSP/CUSPARSE classes */ 6013 b->spptr = NULL; 6014 6015 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6016 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6017 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6018 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6019 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6020 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6021 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6022 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6023 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6024 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6025 #if defined(PETSC_HAVE_MKL_SPARSE) 6026 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6027 #endif 6028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6030 #if defined(PETSC_HAVE_ELEMENTAL) 6031 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6032 #endif 6033 #if defined(PETSC_HAVE_HYPRE) 6034 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6035 #endif 6036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 6039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 6040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 6041 #if defined(PETSC_HAVE_HYPRE) 6042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6043 #endif 6044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 6045 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6046 PetscFunctionReturn(0); 6047 } 6048 6049 /*@C 6050 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6051 and "off-diagonal" part of the matrix in CSR format. 6052 6053 Collective 6054 6055 Input Parameters: 6056 + comm - MPI communicator 6057 . m - number of local rows (Cannot be PETSC_DECIDE) 6058 . n - This value should be the same as the local size used in creating the 6059 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6060 calculated if N is given) For square matrices n is almost always m. 6061 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6062 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6063 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6064 . j - column indices 6065 . a - matrix values 6066 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6067 . oj - column indices 6068 - oa - matrix values 6069 6070 Output Parameter: 6071 . mat - the matrix 6072 6073 Level: advanced 6074 6075 Notes: 6076 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6077 must free the arrays once the matrix has been destroyed and not before. 6078 6079 The i and j indices are 0 based 6080 6081 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6082 6083 This sets local rows and cannot be used to set off-processor values. 6084 6085 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6086 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6087 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6088 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6089 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6090 communication if it is known that only local entries will be set. 6091 6092 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6093 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6094 @*/ 6095 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6096 { 6097 PetscErrorCode ierr; 6098 Mat_MPIAIJ *maij; 6099 6100 PetscFunctionBegin; 6101 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6102 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6103 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6104 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6105 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6106 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6107 maij = (Mat_MPIAIJ*) (*mat)->data; 6108 6109 (*mat)->preallocated = PETSC_TRUE; 6110 6111 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6112 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6113 6114 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6115 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6116 6117 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6118 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6119 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6120 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6121 6122 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6123 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6124 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6125 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6126 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6127 PetscFunctionReturn(0); 6128 } 6129 6130 /* 6131 Special version for direct calls from Fortran 6132 */ 6133 #include <petsc/private/fortranimpl.h> 6134 6135 /* Change these macros so can be used in void function */ 6136 #undef CHKERRQ 6137 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6138 #undef SETERRQ2 6139 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6140 #undef SETERRQ3 6141 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6142 #undef SETERRQ 6143 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6144 6145 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6146 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6147 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6148 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6149 #else 6150 #endif 6151 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6152 { 6153 Mat mat = *mmat; 6154 PetscInt m = *mm, n = *mn; 6155 InsertMode addv = *maddv; 6156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6157 PetscScalar value; 6158 PetscErrorCode ierr; 6159 6160 MatCheckPreallocated(mat,1); 6161 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6162 6163 #if defined(PETSC_USE_DEBUG) 6164 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6165 #endif 6166 { 6167 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6168 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6169 PetscBool roworiented = aij->roworiented; 6170 6171 /* Some Variables required in the macro */ 6172 Mat A = aij->A; 6173 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6174 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6175 MatScalar *aa = a->a; 6176 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6177 Mat B = aij->B; 6178 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6179 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6180 MatScalar *ba = b->a; 6181 6182 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6183 PetscInt nonew = a->nonew; 6184 MatScalar *ap1,*ap2; 6185 6186 PetscFunctionBegin; 6187 for (i=0; i<m; i++) { 6188 if (im[i] < 0) continue; 6189 #if defined(PETSC_USE_DEBUG) 6190 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6191 #endif 6192 if (im[i] >= rstart && im[i] < rend) { 6193 row = im[i] - rstart; 6194 lastcol1 = -1; 6195 rp1 = aj + ai[row]; 6196 ap1 = aa + ai[row]; 6197 rmax1 = aimax[row]; 6198 nrow1 = ailen[row]; 6199 low1 = 0; 6200 high1 = nrow1; 6201 lastcol2 = -1; 6202 rp2 = bj + bi[row]; 6203 ap2 = ba + bi[row]; 6204 rmax2 = bimax[row]; 6205 nrow2 = bilen[row]; 6206 low2 = 0; 6207 high2 = nrow2; 6208 6209 for (j=0; j<n; j++) { 6210 if (roworiented) value = v[i*n+j]; 6211 else value = v[i+j*m]; 6212 if (in[j] >= cstart && in[j] < cend) { 6213 col = in[j] - cstart; 6214 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 6215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6216 } else if (in[j] < 0) continue; 6217 #if defined(PETSC_USE_DEBUG) 6218 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6219 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6220 #endif 6221 else { 6222 if (mat->was_assembled) { 6223 if (!aij->colmap) { 6224 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6225 } 6226 #if defined(PETSC_USE_CTABLE) 6227 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6228 col--; 6229 #else 6230 col = aij->colmap[in[j]] - 1; 6231 #endif 6232 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 6233 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6234 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6235 col = in[j]; 6236 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6237 B = aij->B; 6238 b = (Mat_SeqAIJ*)B->data; 6239 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6240 rp2 = bj + bi[row]; 6241 ap2 = ba + bi[row]; 6242 rmax2 = bimax[row]; 6243 nrow2 = bilen[row]; 6244 low2 = 0; 6245 high2 = nrow2; 6246 bm = aij->B->rmap->n; 6247 ba = b->a; 6248 } 6249 } else col = in[j]; 6250 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6251 } 6252 } 6253 } else if (!aij->donotstash) { 6254 if (roworiented) { 6255 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6256 } else { 6257 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6258 } 6259 } 6260 } 6261 } 6262 PetscFunctionReturnVoid(); 6263 } 6264